In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
In [13]:
CIFAR10_MEAN = [0.4914, 0.4822, 0.4465]
CIFAR10_STD = [0.2470, 0.2435, 0.2616]

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

train_dataset = datasets.CIFAR10(
    root='./data',
    train=True,
    transform=train_transform,
    download=True
)

test_dataset = datasets.CIFAR10(
    root='./data',
    train=False,
    transform=test_transform,
    download=True
)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=4,
    pin_memory=True,
    persistent_workers=True
)

val_loader = DataLoader(
    dataset=test_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
    persistent_workers=True
)
In [14]:
class NiN(nn.Module):
  def __init__(self, num_classes=10):
    super(NiN, self).__init__()
    self.features = nn.Sequential(
        # MLE Block 1
        nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(192),
        nn.ReLU(),
        nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(160),
        nn.ReLU(),
        nn.Conv2d(160, 96, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(96),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        nn.Dropout(0.2),

        nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(192),
        nn.ReLU(),
        nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(192),
        nn.ReLU(),
        nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(192),
        nn.ReLU(),
        nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
        nn.Dropout(0.2),

        nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(192),
        nn.ReLU(),
        nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(192),
        nn.ReLU(),
        nn.Conv2d(192, num_classes, kernel_size=1, stride=1, padding=0),

        nn.AdaptiveAvgPool2d((1, 1))
    )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1)
    return x
In [15]:
model = NiN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 80], gamma=0.2)

best_acc = 0
epochs = 100

for epoch in range(epochs):
  model.train()
  running_loss = 0.0
  correct = 0
  total = 0

  train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]")
  for inputs, targets in train_pbar:
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    _, predicted = outputs.max(1)
    total += targets.size(0)
    correct += predicted.eq(targets).sum().item()

    train_pbar.set_postfix(loss=running_loss/(total/targets.size(0)), acc=100.*correct/total)

  train_loss = running_loss / len(train_loader)
  train_acc = 100. * correct / total

  model.eval()
  val_loss = 0.0
  val_correct = 0
  val_total = 0

  with torch.no_grad():
    val_pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]")
    for inputs, targets in val_pbar:
      inputs, targets = inputs.to(device), targets.to(device)
      outputs = model(inputs)
      loss = criterion(outputs, targets)

      val_loss += loss.item()
      _, predicted = outputs.max(1)
      val_total += targets.size(0)
      val_correct += predicted.eq(targets).sum().item()
      val_pbar.set_postfix(loss=val_loss/(val_total/targets.size(0)), acc=100.*val_correct/val_total)

  validation_loss = val_loss / len(val_loader)
  validation_acc = 100. * val_correct / val_total

  print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | Val Loss: {validation_loss:.4f}, Val Acc: {validation_acc:.2f}%")

  scheduler.step()

  if validation_acc > best_acc:
    best_acc = validation_acc
    torch.save(model.state_dict(), 'best_lenet_classic_model.pth')
    print(f"New best model found! Accuracy: {best_acc:.2f}%. Saving model...")

print("\nTraining finished.")
print(f"Best validation accuracy: {best_acc:.2f}%")
Epoch 1/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s, acc=47.6, loss=0.892]
Epoch 1/100 [Val]: 100%|██████████| 79/79 [00:01<00:00, 73.69it/s, acc=49.4, loss=0.192]
Epoch 1/100 | Train Loss: 1.4260, Train Acc: 47.60% | Val Loss: 1.5218, Val Acc: 49.36%
New best model found! Accuracy: 49.36%. Saving model...
Epoch 2/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.29it/s, acc=62.8, loss=0.651]
Epoch 2/100 [Val]: 100%|██████████| 79/79 [00:01<00:00, 78.96it/s, acc=63.8, loss=0.131]
Epoch 2/100 | Train Loss: 1.0411, Train Acc: 62.81% | Val Loss: 1.0336, Val Acc: 63.78%
New best model found! Accuracy: 63.78%. Saving model...
Epoch 3/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.31it/s, acc=69.9, loss=0.539]
Epoch 3/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.21it/s, acc=60.7, loss=0.161]
Epoch 3/100 | Train Loss: 0.8617, Train Acc: 69.90% | Val Loss: 1.2754, Val Acc: 60.69%
Epoch 4/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=73.1, loss=0.483]
Epoch 4/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.70it/s, acc=68.4, loss=0.12]
Epoch 4/100 | Train Loss: 0.7718, Train Acc: 73.09% | Val Loss: 0.9493, Val Acc: 68.37%
New best model found! Accuracy: 68.37%. Saving model...
Epoch 5/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.54it/s, acc=75.7, loss=0.442]
Epoch 5/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.67it/s, acc=73.2, loss=0.0999]
Epoch 5/100 | Train Loss: 0.7073, Train Acc: 75.65% | Val Loss: 0.7903, Val Acc: 73.24%
New best model found! Accuracy: 73.24%. Saving model...
Epoch 6/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.53it/s, acc=76.8, loss=0.421]
Epoch 6/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.70it/s, acc=71.6, loss=0.104]
Epoch 6/100 | Train Loss: 0.6736, Train Acc: 76.80% | Val Loss: 0.8239, Val Acc: 71.57%
Epoch 7/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.52it/s, acc=77.7, loss=0.405]
Epoch 7/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.52it/s, acc=76.2, loss=0.0878]
Epoch 7/100 | Train Loss: 0.6476, Train Acc: 77.71% | Val Loss: 0.6945, Val Acc: 76.24%
New best model found! Accuracy: 76.24%. Saving model...
Epoch 8/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=78.5, loss=0.388]
Epoch 8/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.83it/s, acc=66.3, loss=0.14]
Epoch 8/100 | Train Loss: 0.6202, Train Acc: 78.54% | Val Loss: 1.1089, Val Acc: 66.35%
Epoch 9/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=79.4, loss=0.372]
Epoch 9/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.10it/s, acc=69.3, loss=0.132]
Epoch 9/100 | Train Loss: 0.5949, Train Acc: 79.40% | Val Loss: 1.0447, Val Acc: 69.34%
Epoch 10/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=79.8, loss=0.367]
Epoch 10/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.83it/s, acc=70.7, loss=0.11]
Epoch 10/100 | Train Loss: 0.5867, Train Acc: 79.81% | Val Loss: 0.8685, Val Acc: 70.70%
Epoch 11/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=80.4, loss=0.357]
Epoch 11/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.51it/s, acc=68.3, loss=0.13]
Epoch 11/100 | Train Loss: 0.5704, Train Acc: 80.41% | Val Loss: 1.0278, Val Acc: 68.33%
Epoch 12/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=80.6, loss=0.352]
Epoch 12/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.54it/s, acc=70.9, loss=0.121]
Epoch 12/100 | Train Loss: 0.5630, Train Acc: 80.63% | Val Loss: 0.9609, Val Acc: 70.92%
Epoch 13/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=81.1, loss=0.343]
Epoch 13/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.31it/s, acc=70.3, loss=0.118]
Epoch 13/100 | Train Loss: 0.5485, Train Acc: 81.09% | Val Loss: 0.9312, Val Acc: 70.34%
Epoch 14/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=81.1, loss=0.345]
Epoch 14/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.87it/s, acc=63, loss=0.185]
Epoch 14/100 | Train Loss: 0.5512, Train Acc: 81.11% | Val Loss: 1.4632, Val Acc: 62.97%
Epoch 15/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=81.3, loss=0.341]
Epoch 15/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.80it/s, acc=78.1, loss=0.0823]
Epoch 15/100 | Train Loss: 0.5450, Train Acc: 81.25% | Val Loss: 0.6512, Val Acc: 78.07%
New best model found! Accuracy: 78.07%. Saving model...
Epoch 16/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=81.7, loss=0.332]
Epoch 16/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.59it/s, acc=76.1, loss=0.0884]
Epoch 16/100 | Train Loss: 0.5311, Train Acc: 81.71% | Val Loss: 0.6993, Val Acc: 76.06%
Epoch 17/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=82.1, loss=0.325]
Epoch 17/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.18it/s, acc=79.4, loss=0.0774]
Epoch 17/100 | Train Loss: 0.5199, Train Acc: 82.14% | Val Loss: 0.6124, Val Acc: 79.42%
New best model found! Accuracy: 79.42%. Saving model...
Epoch 18/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.50it/s, acc=82.2, loss=0.323]
Epoch 18/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.48it/s, acc=76.3, loss=0.0934]
Epoch 18/100 | Train Loss: 0.5169, Train Acc: 82.25% | Val Loss: 0.7389, Val Acc: 76.35%
Epoch 19/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=82.3, loss=0.324]
Epoch 19/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.39it/s, acc=63.6, loss=0.159]
Epoch 19/100 | Train Loss: 0.5184, Train Acc: 82.28% | Val Loss: 1.2561, Val Acc: 63.62%
Epoch 20/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=82.2, loss=0.323]
Epoch 20/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.38it/s, acc=76.1, loss=0.0965]
Epoch 20/100 | Train Loss: 0.5170, Train Acc: 82.20% | Val Loss: 0.7638, Val Acc: 76.14%
Epoch 21/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=82.6, loss=0.316]
Epoch 21/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.94it/s, acc=74.8, loss=0.101]
Epoch 21/100 | Train Loss: 0.5057, Train Acc: 82.59% | Val Loss: 0.7983, Val Acc: 74.81%
Epoch 22/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.43it/s, acc=83.1, loss=0.311]
Epoch 22/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.29it/s, acc=75.7, loss=0.0957]
Epoch 22/100 | Train Loss: 0.4972, Train Acc: 83.08% | Val Loss: 0.7569, Val Acc: 75.70%
Epoch 23/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.46it/s, acc=82.9, loss=0.314]
Epoch 23/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.85it/s, acc=74.3, loss=0.101]
Epoch 23/100 | Train Loss: 0.5018, Train Acc: 82.87% | Val Loss: 0.8007, Val Acc: 74.33%
Epoch 24/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.46it/s, acc=83.1, loss=0.309]
Epoch 24/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.30it/s, acc=80.1, loss=0.0761]
Epoch 24/100 | Train Loss: 0.4947, Train Acc: 83.07% | Val Loss: 0.6019, Val Acc: 80.07%
New best model found! Accuracy: 80.07%. Saving model...
Epoch 25/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.51it/s, acc=83.3, loss=0.304]
Epoch 25/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.63it/s, acc=77.9, loss=0.0843]
Epoch 25/100 | Train Loss: 0.4864, Train Acc: 83.30% | Val Loss: 0.6670, Val Acc: 77.93%
Epoch 26/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=83.1, loss=0.308]
Epoch 26/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.05it/s, acc=76.3, loss=0.0932]
Epoch 26/100 | Train Loss: 0.4919, Train Acc: 83.06% | Val Loss: 0.7372, Val Acc: 76.32%
Epoch 27/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.46it/s, acc=83.6, loss=0.301]
Epoch 27/100 [Val]: 100%|██████████| 79/79 [00:01<00:00, 78.72it/s, acc=73.8, loss=0.102]
Epoch 27/100 | Train Loss: 0.4806, Train Acc: 83.59% | Val Loss: 0.8100, Val Acc: 73.85%
Epoch 28/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=83.4, loss=0.301]
Epoch 28/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.44it/s, acc=78.9, loss=0.0794]
Epoch 28/100 | Train Loss: 0.4804, Train Acc: 83.44% | Val Loss: 0.6284, Val Acc: 78.88%
Epoch 29/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=83.1, loss=0.304]
Epoch 29/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.87it/s, acc=78.3, loss=0.0819]
Epoch 29/100 | Train Loss: 0.4860, Train Acc: 83.11% | Val Loss: 0.6481, Val Acc: 78.35%
Epoch 30/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=83.5, loss=0.298]
Epoch 30/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.73it/s, acc=81.1, loss=0.0711]
Epoch 30/100 | Train Loss: 0.4767, Train Acc: 83.55% | Val Loss: 0.5625, Val Acc: 81.08%
New best model found! Accuracy: 81.08%. Saving model...
Epoch 31/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=89.5, loss=0.192]
Epoch 31/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.70it/s, acc=88.1, loss=0.0423]
Epoch 31/100 | Train Loss: 0.3074, Train Acc: 89.52% | Val Loss: 0.3344, Val Acc: 88.07%
New best model found! Accuracy: 88.07%. Saving model...
Epoch 32/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=90.8, loss=0.167]
Epoch 32/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.35it/s, acc=89.1, loss=0.0411]
Epoch 32/100 | Train Loss: 0.2675, Train Acc: 90.77% | Val Loss: 0.3248, Val Acc: 89.10%
New best model found! Accuracy: 89.10%. Saving model...
Epoch 33/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=91.3, loss=0.155]
Epoch 33/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.73it/s, acc=89, loss=0.041]
Epoch 33/100 | Train Loss: 0.2473, Train Acc: 91.33% | Val Loss: 0.3243, Val Acc: 89.00%
Epoch 34/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=91.8, loss=0.15]
Epoch 34/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.31it/s, acc=89.2, loss=0.0413]
Epoch 34/100 | Train Loss: 0.2395, Train Acc: 91.79% | Val Loss: 0.3264, Val Acc: 89.24%
New best model found! Accuracy: 89.24%. Saving model...
Epoch 35/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=92, loss=0.145]
Epoch 35/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.40it/s, acc=88.8, loss=0.0418]
Epoch 35/100 | Train Loss: 0.2324, Train Acc: 91.95% | Val Loss: 0.3308, Val Acc: 88.77%
Epoch 36/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=92.2, loss=0.14]
Epoch 36/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.40it/s, acc=87.9, loss=0.0459]
Epoch 36/100 | Train Loss: 0.2239, Train Acc: 92.19% | Val Loss: 0.3632, Val Acc: 87.89%
Epoch 37/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=92.1, loss=0.141]
Epoch 37/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.62it/s, acc=89, loss=0.0416]
Epoch 37/100 | Train Loss: 0.2252, Train Acc: 92.15% | Val Loss: 0.3291, Val Acc: 88.97%
Epoch 38/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.51it/s, acc=92.4, loss=0.138]
Epoch 38/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.27it/s, acc=87.2, loss=0.0481]
Epoch 38/100 | Train Loss: 0.2208, Train Acc: 92.39% | Val Loss: 0.3802, Val Acc: 87.18%
Epoch 39/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=92.2, loss=0.142]
Epoch 39/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.42it/s, acc=88.7, loss=0.0442]
Epoch 39/100 | Train Loss: 0.2273, Train Acc: 92.17% | Val Loss: 0.3497, Val Acc: 88.66%
Epoch 40/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=92.5, loss=0.134]
Epoch 40/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.86it/s, acc=87.4, loss=0.0479]
Epoch 40/100 | Train Loss: 0.2145, Train Acc: 92.54% | Val Loss: 0.3789, Val Acc: 87.44%
Epoch 41/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=92.5, loss=0.136]
Epoch 41/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.83it/s, acc=87, loss=0.0493]
Epoch 41/100 | Train Loss: 0.2174, Train Acc: 92.55% | Val Loss: 0.3904, Val Acc: 87.04%
Epoch 42/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=92.5, loss=0.136]
Epoch 42/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.60it/s, acc=87.7, loss=0.0484]
Epoch 42/100 | Train Loss: 0.2174, Train Acc: 92.48% | Val Loss: 0.3825, Val Acc: 87.74%
Epoch 43/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=92.5, loss=0.138]
Epoch 43/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 81.04it/s, acc=83.7, loss=0.0663]
Epoch 43/100 | Train Loss: 0.2207, Train Acc: 92.45% | Val Loss: 0.5248, Val Acc: 83.69%
Epoch 44/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=92.4, loss=0.138]
Epoch 44/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.45it/s, acc=86.6, loss=0.0541]
Epoch 44/100 | Train Loss: 0.2200, Train Acc: 92.39% | Val Loss: 0.4283, Val Acc: 86.57%
Epoch 45/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=92.5, loss=0.135]
Epoch 45/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.05it/s, acc=87.3, loss=0.0505]
Epoch 45/100 | Train Loss: 0.2164, Train Acc: 92.52% | Val Loss: 0.3993, Val Acc: 87.29%
Epoch 46/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.52it/s, acc=92.5, loss=0.137]
Epoch 46/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.02it/s, acc=87.1, loss=0.0511]
Epoch 46/100 | Train Loss: 0.2194, Train Acc: 92.47% | Val Loss: 0.4039, Val Acc: 87.11%
Epoch 47/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.50it/s, acc=92.4, loss=0.139]
Epoch 47/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.36it/s, acc=86.3, loss=0.0538]
Epoch 47/100 | Train Loss: 0.2218, Train Acc: 92.45% | Val Loss: 0.4258, Val Acc: 86.33%
Epoch 48/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.50it/s, acc=92.3, loss=0.138]
Epoch 48/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.58it/s, acc=86, loss=0.055]
Epoch 48/100 | Train Loss: 0.2198, Train Acc: 92.33% | Val Loss: 0.4349, Val Acc: 86.01%
Epoch 49/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.51it/s, acc=92.6, loss=0.134]
Epoch 49/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.24it/s, acc=87, loss=0.0506]
Epoch 49/100 | Train Loss: 0.2146, Train Acc: 92.63% | Val Loss: 0.4004, Val Acc: 87.00%
Epoch 50/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.46it/s, acc=92.4, loss=0.137]
Epoch 50/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 81.22it/s, acc=85.7, loss=0.0584]
Epoch 50/100 | Train Loss: 0.2194, Train Acc: 92.42% | Val Loss: 0.4621, Val Acc: 85.73%
Epoch 51/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=92.6, loss=0.136]
Epoch 51/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.89it/s, acc=87, loss=0.0494]
Epoch 51/100 | Train Loss: 0.2168, Train Acc: 92.61% | Val Loss: 0.3910, Val Acc: 87.05%
Epoch 52/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.50it/s, acc=92.8, loss=0.133]
Epoch 52/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.78it/s, acc=87.9, loss=0.0478]
Epoch 52/100 | Train Loss: 0.2129, Train Acc: 92.76% | Val Loss: 0.3782, Val Acc: 87.89%
Epoch 53/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=92.7, loss=0.133]
Epoch 53/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.28it/s, acc=85.6, loss=0.0569]
Epoch 53/100 | Train Loss: 0.2132, Train Acc: 92.68% | Val Loss: 0.4503, Val Acc: 85.63%
Epoch 54/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.51it/s, acc=92.6, loss=0.134]
Epoch 54/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.36it/s, acc=87, loss=0.0519]
Epoch 54/100 | Train Loss: 0.2147, Train Acc: 92.57% | Val Loss: 0.4106, Val Acc: 86.97%
Epoch 55/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.50it/s, acc=92.9, loss=0.129]
Epoch 55/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.57it/s, acc=85.6, loss=0.0575]
Epoch 55/100 | Train Loss: 0.2056, Train Acc: 92.87% | Val Loss: 0.4547, Val Acc: 85.61%
Epoch 56/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.51it/s, acc=92.9, loss=0.129]
Epoch 56/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.80it/s, acc=85.4, loss=0.0586]
Epoch 56/100 | Train Loss: 0.2060, Train Acc: 92.86% | Val Loss: 0.4635, Val Acc: 85.38%
Epoch 57/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=92.7, loss=0.132]
Epoch 57/100 [Val]: 100%|██████████| 79/79 [00:01<00:00, 78.08it/s, acc=86.2, loss=0.055]
Epoch 57/100 | Train Loss: 0.2107, Train Acc: 92.69% | Val Loss: 0.4349, Val Acc: 86.24%
Epoch 58/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=92.6, loss=0.133]
Epoch 58/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.05it/s, acc=87.2, loss=0.0508]
Epoch 58/100 | Train Loss: 0.2132, Train Acc: 92.56% | Val Loss: 0.4022, Val Acc: 87.15%
Epoch 59/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=92.9, loss=0.13]
Epoch 59/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.60it/s, acc=87.5, loss=0.0495]
Epoch 59/100 | Train Loss: 0.2086, Train Acc: 92.92% | Val Loss: 0.3920, Val Acc: 87.55%
Epoch 60/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=93, loss=0.128]
Epoch 60/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.01it/s, acc=87.3, loss=0.0495]
Epoch 60/100 | Train Loss: 0.2041, Train Acc: 92.99% | Val Loss: 0.3913, Val Acc: 87.33%
Epoch 61/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.43it/s, acc=96, loss=0.0774]
Epoch 61/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.64it/s, acc=91.6, loss=0.0321]
Epoch 61/100 | Train Loss: 0.1237, Train Acc: 96.00% | Val Loss: 0.2536, Val Acc: 91.61%
New best model found! Accuracy: 91.61%. Saving model...
Epoch 62/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.42it/s, acc=97.1, loss=0.0603]
Epoch 62/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 81.04it/s, acc=91.8, loss=0.0319]
Epoch 62/100 | Train Loss: 0.0963, Train Acc: 97.13% | Val Loss: 0.2525, Val Acc: 91.85%
New best model found! Accuracy: 91.85%. Saving model...
Epoch 63/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=97.3, loss=0.0552]
Epoch 63/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.76it/s, acc=91.9, loss=0.0316]
Epoch 63/100 | Train Loss: 0.0883, Train Acc: 97.34% | Val Loss: 0.2503, Val Acc: 91.89%
New best model found! Accuracy: 91.89%. Saving model...
Epoch 64/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.46it/s, acc=97.5, loss=0.0516]
Epoch 64/100 [Val]: 100%|██████████| 79/79 [00:01<00:00, 78.78it/s, acc=91.8, loss=0.0321]
Epoch 64/100 | Train Loss: 0.0824, Train Acc: 97.51% | Val Loss: 0.2537, Val Acc: 91.75%
Epoch 65/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=97.7, loss=0.0485]
Epoch 65/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.83it/s, acc=91.9, loss=0.0326]
Epoch 65/100 | Train Loss: 0.0776, Train Acc: 97.68% | Val Loss: 0.2577, Val Acc: 91.87%
Epoch 66/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.49it/s, acc=97.9, loss=0.0455]
Epoch 66/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.59it/s, acc=91.8, loss=0.0325]
Epoch 66/100 | Train Loss: 0.0727, Train Acc: 97.93% | Val Loss: 0.2571, Val Acc: 91.77%
Epoch 67/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=98, loss=0.0435]
Epoch 67/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.58it/s, acc=91.7, loss=0.0332]
Epoch 67/100 | Train Loss: 0.0696, Train Acc: 97.97% | Val Loss: 0.2625, Val Acc: 91.65%
Epoch 68/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.44it/s, acc=98, loss=0.0426]
Epoch 68/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.05it/s, acc=91.9, loss=0.0329]
Epoch 68/100 | Train Loss: 0.0681, Train Acc: 98.00% | Val Loss: 0.2602, Val Acc: 91.92%
New best model found! Accuracy: 91.92%. Saving model...
Epoch 69/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=98.2, loss=0.0397]
Epoch 69/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 81.05it/s, acc=92, loss=0.0337]
Epoch 69/100 | Train Loss: 0.0634, Train Acc: 98.21% | Val Loss: 0.2663, Val Acc: 92.00%
New best model found! Accuracy: 92.00%. Saving model...
Epoch 70/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=98.3, loss=0.0383]
Epoch 70/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.72it/s, acc=92, loss=0.0339]
Epoch 70/100 | Train Loss: 0.0613, Train Acc: 98.25% | Val Loss: 0.2681, Val Acc: 92.01%
New best model found! Accuracy: 92.01%. Saving model...
Epoch 71/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.51it/s, acc=98.3, loss=0.0378]
Epoch 71/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.68it/s, acc=92.1, loss=0.0333]
Epoch 71/100 | Train Loss: 0.0605, Train Acc: 98.28% | Val Loss: 0.2638, Val Acc: 92.12%
New best model found! Accuracy: 92.12%. Saving model...
Epoch 72/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.46it/s, acc=98.4, loss=0.0358]
Epoch 72/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.93it/s, acc=91.8, loss=0.035]
Epoch 72/100 | Train Loss: 0.0572, Train Acc: 98.38% | Val Loss: 0.2769, Val Acc: 91.80%
Epoch 73/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=98.4, loss=0.0356]
Epoch 73/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 79.61it/s, acc=91.8, loss=0.0343]
Epoch 73/100 | Train Loss: 0.0569, Train Acc: 98.44% | Val Loss: 0.2713, Val Acc: 91.75%
Epoch 74/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=98.4, loss=0.0358]
Epoch 74/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.75it/s, acc=91.7, loss=0.034]
Epoch 74/100 | Train Loss: 0.0573, Train Acc: 98.39% | Val Loss: 0.2689, Val Acc: 91.71%
Epoch 75/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=98.6, loss=0.0335]
Epoch 75/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.77it/s, acc=91.6, loss=0.0362]
Epoch 75/100 | Train Loss: 0.0535, Train Acc: 98.56% | Val Loss: 0.2861, Val Acc: 91.64%
Epoch 76/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=98.5, loss=0.0337]
Epoch 76/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.61it/s, acc=91.6, loss=0.0349]
Epoch 76/100 | Train Loss: 0.0539, Train Acc: 98.50% | Val Loss: 0.2761, Val Acc: 91.63%
Epoch 77/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.47it/s, acc=98.6, loss=0.0328]
Epoch 77/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.33it/s, acc=91.8, loss=0.0351]
Epoch 77/100 | Train Loss: 0.0524, Train Acc: 98.62% | Val Loss: 0.2775, Val Acc: 91.77%
Epoch 78/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.53it/s, acc=98.7, loss=0.0317]
Epoch 78/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.66it/s, acc=91.9, loss=0.0352]
Epoch 78/100 | Train Loss: 0.0507, Train Acc: 98.65% | Val Loss: 0.2787, Val Acc: 91.86%
Epoch 79/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.52it/s, acc=98.6, loss=0.0313]
Epoch 79/100 [Val]: 100%|██████████| 79/79 [00:01<00:00, 79.00it/s, acc=91.8, loss=0.0354]
Epoch 79/100 | Train Loss: 0.0501, Train Acc: 98.61% | Val Loss: 0.2804, Val Acc: 91.76%
Epoch 80/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.48it/s, acc=98.6, loss=0.0318]
Epoch 80/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.80it/s, acc=91.6, loss=0.0359]
Epoch 80/100 | Train Loss: 0.0509, Train Acc: 98.61% | Val Loss: 0.2840, Val Acc: 91.58%
Epoch 81/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.50it/s, acc=99, loss=0.0253]
Epoch 81/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.53it/s, acc=91.9, loss=0.0339]
Epoch 81/100 | Train Loss: 0.0404, Train Acc: 99.00% | Val Loss: 0.2680, Val Acc: 91.94%
Epoch 82/100 [Train]: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s, acc=99.1, loss=0.0237]
Epoch 82/100 [Val]: 100%|██████████| 79/79 [00:00<00:00, 80.46it/s, acc=92, loss=0.0339]
Epoch 82/100 | Train Loss: 0.0379, Train Acc: 99.11% | Val Loss: 0.2684, Val Acc: 91.97%
Epoch 83/100 [Train]:  71%|███████   | 276/391 [00:09<00:04, 28.32it/s, acc=99.2, loss=0.0364]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
/tmp/ipython-input-2310262104.py in <cell line: 0>()
     23     optimizer.step()
     24 
---> 25     running_loss += loss.item()
     26     _, predicted = outputs.max(1)
     27     total += targets.size(0)

KeyboardInterrupt: