Integrate with Hugging Face Accelerate¶
Hugging Face Accelerate was created for PyTorch users who like to write the training loop of PyTorch models but are reluctant to write and maintain the boilerplate code needed to use multi-GPUs/TPU/fp16.
Instrument Accelerate with Comet to start managing experiments, create dataset versions and track hyperparameters for faster and easier reproducibility and collaboration.
Comet SDK | Minimum SDK version | Minimum accelerate version |
---|---|---|
Python-SDK | 3.31.5 | 0.17.0 |
Start logging¶
Select Comet as your tracker when you instatiate the Accelerator object in your code.
accelerator = Accelerator(log_with="comet_ml")
accelerator.init_trackers(
project_name="comet-example-accelerate"
)
accelerator.log({"batch_accuracy": batch_correct / batch_total})
Log Hyperparameters¶
To log hyperparameters to Comet, pass in your dictionary to the config parameter of the init_tracker function
hyper_params = {"batch_size": 100, "num_epochs": 3, "learning_rate": 0.01}
accelerator = Accelerator(log_with="comet_ml")
accelerator.init_trackers(
project_name="comet-example-accelerate-notebook", config=hyper_params
)
Log Assets, Artifacts or Other¶
Use the following code snippet to access the Experiment object from the Accelerator class. Then use the Experiment API to log images, text, audio to Comet.
experiment = accelerator.get_tracker("comet_ml").tracker
experiment.log_image(your_image, "your_image_name")
End-to-end example¶
Get started with a basic example of using Comet with the Accelerate Accelerator
.
You can check out the results of this example Accelerate experiment for a preview of what's to come.
Install dependencies¶
python -m pip install "comet_ml>=3.44.0" torch torchvision tqdm "accelerate>=0.17.0"
Run the example¶
import comet_ml
comet_ml.login()
from accelerate import Accelerator
from torch.autograd import Variable
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
hyper_params = {"batch_size": 100, "num_epochs": 3, "learning_rate": 0.01}
# MNIST Dataset
train_dataset = datasets.MNIST(
root="./data/", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = datasets.MNIST(
root="./data/", train=False, transform=transforms.ToTensor()
)
# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(
dataset=train_dataset, batch_size=hyper_params["batch_size"], shuffle=True
)
test_loader = torch.utils.data.DataLoader(
dataset=test_dataset, batch_size=hyper_params["batch_size"], shuffle=False
)
accelerator = Accelerator(log_with="comet_ml")
accelerator.init_trackers(
project_name="comet-example-accelerate-notebook", config=hyper_params
)
device = accelerator.device
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
return x
model = Net().to(device)
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=hyper_params["learning_rate"])
model, optimizer, train_loader = accelerator.prepare(model, optimizer, train_loader)
def train(model, optimizer, criterion, dataloader, epoch):
model.train()
total_loss = 0
correct = 0
for batch_idx, (images, labels) in enumerate(
tqdm(dataloader, total=len(dataloader))
):
optimizer.zero_grad()
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
pred = outputs.argmax(
dim=1, keepdim=True
) # get the index of the max log-probability
accelerator.backward(loss)
optimizer.step()
# Compute train accuracy
batch_correct = pred.eq(labels.view_as(pred)).sum().item()
batch_total = labels.size(0)
total_loss += loss.item()
correct += batch_correct
# Log batch_accuracy to Comet; step is each batch
accelerator.log({"batch_accuracy": batch_correct / batch_total})
total_loss /= len(dataloader.dataset)
correct /= len(dataloader.dataset)
accelerator.get_tracker("comet_ml").tracker.log_metrics(
{"accuracy": correct, "loss": total_loss}, epoch=epoch
)
# Train the Model
print("Running Model Training")
max_epochs = hyper_params["num_epochs"]
for epoch in range(max_epochs + 1):
print("Epoch: {}/{}".format(epoch, max_epochs))
train(model, optimizer, criterion, train_loader, epoch)
Try it out!¶
Here's an example for using Comet with Accelerate.
Configure Comet for Accelerate¶
The Accelerate integration don't have specific configuration options but follows the general Comet Configuration.