Download as pdf or txt
Download as pdf or txt
You are on page 1of 60


March 6, 2024

0.1 Mobilenet
0.1.1 1. Load the Data
[2]: import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from import DataLoader

# Define data transformations

transform = transforms.Compose([
transforms.Resize((224, 224)), # Resize to fit model input size
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])␣
↪ # ImageNet normalization


# Load dataset
train_dataset = datasets.ImageFolder(root='train', transform=transform)
test_dataset = datasets.ImageFolder(root='test', transform=transform)

# Create data loaders

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

[15]: train_dataset.classes

[15]: ['001.Black_footed_Albatross',





0.1.2 2. Initialise the Training Model
Here, we are using the Mobile Net Architecture for the same since it has proved to be fast and at
the same time has good accuracy

[5]: import torch.nn as nn

import torchvision.models as models

# Initialize MobileNet model

mobilenet = models.mobilenet_v2(pretrained=True)

# Modify the last fully connected layer for the number of classes in your␣

num_classes = len(train_dataset.classes)
mobilenet.classifier[1] = nn.Linear(in_features=1280, out_features=num_classes)

# Freeze convolutional layers

for param in mobilenet.parameters():
param.requires_grad = False

# Unfreeze the last few layers for fine-tuning

for param in mobilenet.classifier.parameters():
param.requires_grad = True

UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be
removed in the future, please use 'weights' instead.
UserWarning: Arguments other than a weight enum or `None` for 'weights' are
deprecated since 0.13 and may be removed in the future. The current behavior is
equivalent to passing `weights=MobileNet_V2_Weights.IMAGENET1K_V1`. You can also
use `weights=MobileNet_V2_Weights.DEFAULT` to get the most up-to-date weights.
Downloading: "" to
��| 13.6M/13.6M [00:02<00:00, 5.51MB/s]

0.1.3 3. Define the Training Loop

[10]: import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mobilenet.parameters(), lr=0.001)

def train_m(model, train_loader, criterion, optimizer, num_epochs=30):
train_losses = []
train_accuracy = []

for epoch in range(num_epochs):

running_loss = 0.0
correct = 0
total = 0

for inputs, labels in train_loader:

inputs, labels =,


outputs = model(inputs)
loss = criterion(outputs, labels)

running_loss += loss.item()

_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()

epoch_loss = running_loss / len(train_loader)

epoch_accuracy = 100. * correct / total

print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f},␣

↪Accuracy: {epoch_accuracy:.2f}%")

return train_losses, train_accuracy

train_losses_m, train_accuracy_m = train_m(mobilenet, train_loader, criterion,␣


Epoch [1/30], Loss: 0.5475, Accuracy: 88.81%

Epoch [2/30], Loss: 0.4232, Accuracy: 92.99%
Epoch [3/30], Loss: 0.3650, Accuracy: 94.41%
Epoch [4/30], Loss: 0.3245, Accuracy: 95.31%
Epoch [5/30], Loss: 0.2908, Accuracy: 95.50%
Epoch [6/30], Loss: 0.2555, Accuracy: 96.66%
Epoch [7/30], Loss: 0.2373, Accuracy: 96.80%
Epoch [8/30], Loss: 0.2151, Accuracy: 97.36%

Epoch [9/30], Loss: 0.1964, Accuracy: 97.58%
Epoch [10/30], Loss: 0.1796, Accuracy: 98.01%
Epoch [11/30], Loss: 0.1658, Accuracy: 98.65%
Epoch [12/30], Loss: 0.1613, Accuracy: 97.85%
Epoch [13/30], Loss: 0.1466, Accuracy: 98.33%
Epoch [14/30], Loss: 0.1360, Accuracy: 98.72%
Epoch [15/30], Loss: 0.1280, Accuracy: 98.67%
Epoch [16/30], Loss: 0.1220, Accuracy: 98.88%
Epoch [17/30], Loss: 0.1200, Accuracy: 98.67%
Epoch [18/30], Loss: 0.1089, Accuracy: 99.03%
Epoch [19/30], Loss: 0.1046, Accuracy: 99.02%
Epoch [20/30], Loss: 0.0983, Accuracy: 99.03%
Epoch [21/30], Loss: 0.0903, Accuracy: 99.18%
Epoch [22/30], Loss: 0.0901, Accuracy: 99.20%
Epoch [23/30], Loss: 0.0840, Accuracy: 99.20%
Epoch [24/30], Loss: 0.0829, Accuracy: 99.17%
Epoch [25/30], Loss: 0.0815, Accuracy: 99.08%
Epoch [26/30], Loss: 0.0783, Accuracy: 99.17%
Epoch [27/30], Loss: 0.0738, Accuracy: 99.25%
Epoch [28/30], Loss: 0.0730, Accuracy: 99.12%
Epoch [29/30], Loss: 0.0727, Accuracy: 99.18%
Epoch [30/30], Loss: 0.0722, Accuracy: 99.12%

0.1.4 4. Plot the training results

[11]: import matplotlib.pyplot as plt

# Plot architecture

# Plot training details

plt.plot(train_losses_m, label='Train Loss')
plt.plot(train_accuracy_m, label='Train Accuracy')
plt.title('Training Loss and Accuracy')

(features): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)

(1): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
groups=32, bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True,
(2): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
groups=96, bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True,
(3): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)

(2): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True,
(4): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1,
1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True,
(5): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True,
(6): InvertedResidual(
(conv): Sequential(

(0): Conv2dNormActivation(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True,
(7): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1,
1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(8): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1,

1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(9): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(10): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,

(11): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True,
(12): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True,
(13): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True,

(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True,
(14): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(2, 2), padding=(1,
1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(576, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True,
(15): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)

(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True,
(16): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True,
(17): InvertedResidual(
(conv): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(1): Conv2dNormActivation(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(2): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True,
(18): Conv2dNormActivation(
(0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)

(1): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True,
(2): ReLU6(inplace=True)
(classifier): Sequential(
(0): Dropout(p=0.2, inplace=False)
(1): Linear(in_features=1280, out_features=200, bias=True)

[48]: device

[48]: device(type='cuda')

[12]:, 'mobilenet_checkpoint.pth')

0.1.5 5. Evaluate the Model

[13]: def evaluate_m(model, test_loader):
correct = 0
total = 0

with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels =,

outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

accuracy = 100. * correct / total

print(f"Test Accuracy: {accuracy:.2f}%")

evaluate_m(mobilenet, test_loader)

Test Accuracy: 55.25%

0.2 Inception
[ ]:

0.2.1 1. Initialise the Training Model

Here, we are using the Inception Architecture for the same since it has proved to be reliable with
1*1 convolutions

[49]: import torch

import torchvision.models as models

# Load pre-trained Inception model

inception_model = models.inception_v3(pretrained=True)
# Modify the last fully connected layer for the desired number of classes
num_classes = len(train_dataset.classes)
inception_model.fc = nn.Linear(inception_model.fc.in_features, num_classes)

# Freeze convolutional layers

for param in inception_model.parameters():
param.requires_grad = False

# Unfreeze the last few layers for fine-tuning

for param in inception_model.fc.parameters():
param.requires_grad = True

0.2.2 2. Load the Data
[53]: import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from import DataLoader

# Define transformations for data preprocessing

transform = transforms.Compose([
transforms.Resize((299, 299)), # Resize images to match Inception input␣

transforms.ToTensor(), # Convert images to tensors

transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])␣
↪ # Normalize images


# Load training and testing datasets

train_dataset = ImageFolder(root='train/', transform=transform)
test_dataset = ImageFolder(root='test/', transform=transform)

# Create data loaders

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

0.2.3 3. Define the Training Loop

[58]: import torch.optim as optim
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(inception_model.parameters(), lr=0.001)

def train(model, train_loader, criterion, optimizer, num_epochs=25):

train_losses = []
train_accuracy = []

for epoch in range(num_epochs):

running_loss = 0.0
correct = 0
total = 0

for inputs, labels in train_loader:

inputs, labels =,


# Resize input images to match Inception model's input size

inputs = nn.functional.interpolate(inputs, size=(299, 299),␣
↪mode='bilinear', align_corners=False)

# Forward pass
outputs, aux_outputs = model(inputs)

# Get main output (logits) from the 'output' tensor

main_outputs = outputs

# Calculate loss
loss = criterion(main_outputs, labels)

# Backward pass and optimization


running_loss += loss.item()

# Calculate accuracy
_, predicted = main_outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()

epoch_loss = running_loss / len(train_loader)

epoch_accuracy = 100. * correct / total

print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f},␣

↪Accuracy: {epoch_accuracy:.2f}%")

return train_losses, train_accuracy

[59]: train(inception_model, train_loader, criterion, optimizer, num_epochs=20)

Epoch [1/20], Loss: 4.5101, Accuracy: 12.18%

Epoch [2/20], Loss: 2.9086, Accuracy: 36.97%
Epoch [3/20], Loss: 2.2402, Accuracy: 49.20%
Epoch [4/20], Loss: 1.8574, Accuracy: 57.74%
Epoch [5/20], Loss: 1.6349, Accuracy: 61.95%
Epoch [6/20], Loss: 1.4252, Accuracy: 66.60%
Epoch [7/20], Loss: 1.2799, Accuracy: 69.70%
Epoch [8/20], Loss: 1.1566, Accuracy: 72.82%
Epoch [9/20], Loss: 1.0692, Accuracy: 74.02%
Epoch [10/20], Loss: 1.0009, Accuracy: 75.96%

Epoch [11/20], Loss: 0.9476, Accuracy: 76.24%
Epoch [12/20], Loss: 0.8681, Accuracy: 78.45%
Epoch [13/20], Loss: 0.8172, Accuracy: 79.13%
Epoch [14/20], Loss: 0.7826, Accuracy: 80.25%
Epoch [15/20], Loss: 0.7598, Accuracy: 80.53%
Epoch [16/20], Loss: 0.6949, Accuracy: 82.20%
Epoch [17/20], Loss: 0.7028, Accuracy: 81.26%
Epoch [18/20], Loss: 0.6456, Accuracy: 82.35%
Epoch [19/20], Loss: 0.6203, Accuracy: 83.60%
Epoch [20/20], Loss: 0.6106, Accuracy: 83.88%

[59]: ([4.510051303721489,


[61]:, 'inception_model.pth')

0.2.4 4. Evaluate the Model

[63]: def evaluate(model, test_loader):
correct = 0
total = 0

with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels =,

outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

accuracy = 100.*correct / total

print(f"Test Accuracy: {accuracy:.4f}")

evaluate(inception_model, test_loader)

Test Accuracy: 51.8813

0.2.5 5. Plot the training results

[122]: import matplotlib.pyplot as plt

# Plot training loss and accuracy curves

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.title('Training Loss Curve')

plt.subplot(1, 2, 2)
plt.plot(train_accuracy, label='Train Accuracy')
plt.ylabel('Accuracy (%)')
plt.title('Training Accuracy Curve')


0.3 EfficientNet
[ ]:

[66]: !pip install efficientnet_pytorch

Collecting efficientnet_pytorch
Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Preparing metadata ( started
Preparing metadata ( finished with status 'done'
Requirement already satisfied: torch in c:\users\asus\anaconda3\lib\site-
packages (from efficientnet_pytorch) (2.1.0+cu118)
Requirement already satisfied: filelock in c:\users\asus\anaconda3\lib\site-
packages (from torch->efficientnet_pytorch) (3.6.0)
Requirement already satisfied: typing-extensions in
c:\users\asus\anaconda3\lib\site-packages (from torch->efficientnet_pytorch)
Requirement already satisfied: sympy in c:\users\asus\anaconda3\lib\site-
packages (from torch->efficientnet_pytorch) (1.10.1)
Requirement already satisfied: networkx in c:\users\asus\anaconda3\lib\site-
packages (from torch->efficientnet_pytorch) (2.7.1)
Requirement already satisfied: jinja2 in c:\users\asus\anaconda3\lib\site-
packages (from torch->efficientnet_pytorch) (3.1.3)
Requirement already satisfied: fsspec in c:\users\asus\anaconda3\lib\site-

packages (from torch->efficientnet_pytorch) (2024.2.0)
Requirement already satisfied: MarkupSafe>=2.0 in
c:\users\asus\anaconda3\lib\site-packages (from
jinja2->torch->efficientnet_pytorch) (2.0.1)
Requirement already satisfied: mpmath>=0.19 in c:\users\asus\anaconda3\lib\site-
packages (from sympy->torch->efficientnet_pytorch) (1.2.1)
Building wheels for collected packages: efficientnet_pytorch
Building wheel for efficientnet_pytorch ( started
Building wheel for efficientnet_pytorch ( finished with status
Created wheel for efficientnet_pytorch:
filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16428
Stored in directory: c:\users\asus\appdata\local\pip\cache\wheels\29\16\24\752
Successfully built efficientnet_pytorch
Installing collected packages: efficientnet_pytorch
Successfully installed efficientnet_pytorch-0.7.1

0.3.1 1 and 2. Initialise the Training Model and Load Data

Here, we are using the state of the art EfficientNey Architecture

[68]: import torch

import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from efficientnet_pytorch import EfficientNet
import matplotlib.pyplot as plt

# Set device (GPU/CPU)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transforms
transform = transforms.Compose([
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Load datasets
train_dataset = datasets.ImageFolder('train/', transform=transform)
test_dataset = datasets.ImageFolder('test/', transform=transform)

# Create data loaders

train_loader =, batch_size=32,␣

test_loader =, batch_size=32)

# Load pre-trained EfficientNet with ImageNet weights

efficientnet = EfficientNet.from_pretrained('efficientnet-b0',␣

# Modify the model to fit the desired number of parameters

# For example, reducing the number of parameters to 10M
# Modify model architecture accordingly to fit the constraint

# Define criterion and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(efficientnet.parameters(), lr=0.001)

Loaded pretrained weights for efficientnet-b0

[69]: efficientnet

[69]: EfficientNet(
(_conv_stem): Conv2dStaticSamePadding(
3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
(static_padding): ZeroPad2d((0, 1, 0, 1))
(_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True,
(_blocks): ModuleList(
(0): MBConvBlock(
(_depthwise_conv): Conv2dStaticSamePadding(
32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
(static_padding): ZeroPad2d((1, 1, 1, 1))
(_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
32, 8, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
8, 32, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(16, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)

(_swish): MemoryEfficientSwish()
(1): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(96, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
96, 96, kernel_size=(3, 3), stride=[2, 2], groups=96, bias=False
(static_padding): ZeroPad2d((0, 1, 0, 1))
(_bn1): BatchNorm2d(96, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
96, 4, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
4, 96, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(24, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(2): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(144, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
144, 144, kernel_size=(3, 3), stride=(1, 1), groups=144, bias=False
(static_padding): ZeroPad2d((1, 1, 1, 1))
(_bn1): BatchNorm2d(144, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
144, 6, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()

(_se_expand): Conv2dStaticSamePadding(
6, 144, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(24, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(3): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(144, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
144, 144, kernel_size=(5, 5), stride=[2, 2], groups=144, bias=False
(static_padding): ZeroPad2d((1, 2, 1, 2))
(_bn1): BatchNorm2d(144, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
144, 6, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
6, 144, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
144, 40, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(4): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()

(_bn0): BatchNorm2d(240, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
240, 240, kernel_size=(5, 5), stride=(1, 1), groups=240, bias=False
(static_padding): ZeroPad2d((2, 2, 2, 2))
(_bn1): BatchNorm2d(240, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
240, 10, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
10, 240, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
240, 40, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(5): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(240, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
240, 240, kernel_size=(3, 3), stride=[2, 2], groups=240, bias=False
(static_padding): ZeroPad2d((0, 1, 0, 1))
(_bn1): BatchNorm2d(240, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
240, 10, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
10, 240, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False

(static_padding): Identity()
(_bn2): BatchNorm2d(80, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(6-7): 2 x MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(480, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
480, 480, kernel_size=(3, 3), stride=(1, 1), groups=480, bias=False
(static_padding): ZeroPad2d((1, 1, 1, 1))
(_bn1): BatchNorm2d(480, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
480, 20, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
20, 480, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(80, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(8): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(480, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
480, 480, kernel_size=(5, 5), stride=[1, 1], groups=480, bias=False
(static_padding): ZeroPad2d((2, 2, 2, 2))
(_bn1): BatchNorm2d(480, eps=0.001, momentum=0.010000000000000009,

affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
480, 20, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
20, 480, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(112, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(9-10): 2 x MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(672, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
672, 672, kernel_size=(5, 5), stride=(1, 1), groups=672, bias=False
(static_padding): ZeroPad2d((2, 2, 2, 2))
(_bn1): BatchNorm2d(672, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
672, 28, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
28, 672, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(112, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(11): MBConvBlock(

(_expand_conv): Conv2dStaticSamePadding(
112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(672, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
672, 672, kernel_size=(5, 5), stride=[2, 2], groups=672, bias=False
(static_padding): ZeroPad2d((1, 2, 1, 2))
(_bn1): BatchNorm2d(672, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
672, 28, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
28, 672, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(192, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(12-14): 3 x MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(1152, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
1152, 1152, kernel_size=(5, 5), stride=(1, 1), groups=1152, bias=False
(static_padding): ZeroPad2d((2, 2, 2, 2))
(_bn1): BatchNorm2d(1152, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
1152, 48, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
48, 1152, kernel_size=(1, 1), stride=(1, 1)

(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(192, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(15): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn0): BatchNorm2d(1152, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_depthwise_conv): Conv2dStaticSamePadding(
1152, 1152, kernel_size=(3, 3), stride=[1, 1], groups=1152, bias=False
(static_padding): ZeroPad2d((1, 1, 1, 1))
(_bn1): BatchNorm2d(1152, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_se_reduce): Conv2dStaticSamePadding(
1152, 48, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_se_expand): Conv2dStaticSamePadding(
48, 1152, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
(_project_conv): Conv2dStaticSamePadding(
1152, 320, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn2): BatchNorm2d(320, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_swish): MemoryEfficientSwish()
(_conv_head): Conv2dStaticSamePadding(
320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
(_bn1): BatchNorm2d(1280, eps=0.001, momentum=0.010000000000000009,
affine=True, track_running_stats=True)
(_avg_pooling): AdaptiveAvgPool2d(output_size=1)

(_dropout): Dropout(p=0.2, inplace=False)
(_fc): Linear(in_features=1280, out_features=200, bias=True)
(_swish): MemoryEfficientSwish()

0.3.2 3. Define the Training Loop

[ ]:

[70]: # Training loop

def train_efficient(model, train_loader, criterion, optimizer, num_epochs=10):

train_losses = []
train_accuracy = []

for epoch in range(num_epochs):

running_loss = 0.0
correct = 0
total = 0

for inputs, labels in train_loader:

inputs, labels =,


outputs = model(inputs)
loss = criterion(outputs, labels)

running_loss += loss.item()
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

epoch_loss = running_loss / len(train_loader)

epoch_accuracy = 100. * correct / total

print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f},␣

Accuracy: {epoch_accuracy:.2f}%")

return train_losses, train_accuracy

# Train the model
train_losses_e, train_accuracy_e = train_efficient(efficientnet, train_loader,␣
↪criterion, optimizer)

Epoch [1/10], Loss: 3.2045, Accuracy: 27.79%

Epoch [2/10], Loss: 1.1995, Accuracy: 65.97%
Epoch [3/10], Loss: 0.6673, Accuracy: 80.43%
Epoch [4/10], Loss: 0.3889, Accuracy: 88.67%
Epoch [5/10], Loss: 0.2651, Accuracy: 91.98%
Epoch [6/10], Loss: 0.2130, Accuracy: 93.76%
Epoch [7/10], Loss: 0.1359, Accuracy: 96.25%
Epoch [8/10], Loss: 0.1243, Accuracy: 96.65%
Epoch [9/10], Loss: 0.1648, Accuracy: 95.36%
Epoch [10/10], Loss: 0.1929, Accuracy: 94.26%

0.3.3 4. Plot the training results

[71]: # Plot training loss and accuracy curves

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses_e, label='Training Loss')
plt.title('Training Loss Curve')

plt.subplot(1, 2, 2)
plt.plot(train_accuracy_e, label='Training Accuracy')
plt.ylabel('Accuracy (%)')
plt.title('Training Accuracy Curve')

0.3.4 5. Evaluate the Model
[72]: # Evaluate the model on the test set
def evaluate_efficient(model, test_loader):
correct = 0
total = 0

with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels =,
outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

test_accuracy = 100. * correct / total

print(f"Test Accuracy: {test_accuracy:.2f}%")

# Evaluate the model

evaluate_efficient(efficientnet, test_loader)

# Save the final model checkpoint, 'efficientnet_checkpoint.pth')

Test Accuracy: 58.27%

[ ]:

[73]: model = efficientnet

0.4 Resnet
[ ]:

[82]: import torch

import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.datasets as datasets
from import DataLoader
import matplotlib.pyplot as plt

0.4.1 1 and 2. Initialise the Training Model and Load Data

Here, we are using the age old good Resnet Model

[84]: # Define data transforms

transform = transforms.Compose([
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Load dataset
train_dataset = datasets.ImageFolder('train', transform=transform)
test_dataset = datasets.ImageFolder('test', transform=transform)

# Create data loaders

train_loader_resnet = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader_resnet = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the pretrained ResNet model

model_resnet = models.resnet18(pretrained=True)

# Modify the fully connected layer to match the number of classes in your␣

num_classes = len(train_dataset.classes)
model_resnet.fc = nn.Linear(model_resnet.fc.in_features, num_classes)

# Define loss function and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_resnet.parameters(), lr=0.001)

0.4.2 3. Define the Training Loop and also 5. Evaluating on the Go
[85]: # Training function
def train_resnet(model, train_loader, criterion, optimizer, num_epochs=10):
train_losses_resnet = []
train_accuracies_resnet = []
test_losses_resnet = []
test_accuracies_resnet = []

for epoch in range(num_epochs):

running_loss = 0.0
correct = 0
total = 0
for images, labels in train_loader:
outputs = model(images)
loss = criterion(outputs, labels)

running_loss += loss.item()
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

train_loss = running_loss / len(train_loader)

train_accuracy = correct / total

# Evaluation on test set

test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
outputs = model(images)
loss = criterion(outputs, labels)
test_loss += loss.item()
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

test_loss /= len(test_loader)
test_accuracy = correct / total


print(f'Epoch {epoch+1}/{num_epochs}, '

f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, '
f'Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.4f}')

return train_losses_resnet, train_accuracies_resnet, test_losses_resnet,␣


[86]: # Train the model

train_losses_resnet, train_accuracies_resnet, test_losses_resnet,␣
↪test_accuracies_resnet = train_resnet(model_resnet, train_loader_resnet,␣

↪criterion, optimizer)

# Save the trained model checkpoint, 'pretrained_resnet_checkpoint.pth')

Epoch 1/10, Train Loss: 3.8045, Train Acc: 0.1775, Test Loss: 2.9566, Test Acc:
Epoch 2/10, Train Loss: 1.9243, Train Acc: 0.4973, Test Loss: 2.4500, Test Acc:
Epoch 3/10, Train Loss: 1.1237, Train Acc: 0.6862, Test Loss: 2.0689, Test Acc:
Epoch 4/10, Train Loss: 0.5782, Train Acc: 0.8465, Test Loss: 1.8525, Test Acc:
Epoch 5/10, Train Loss: 0.2601, Train Acc: 0.9451, Test Loss: 1.6606, Test Acc:
Epoch 6/10, Train Loss: 0.0960, Train Acc: 0.9823, Test Loss: 1.5652, Test Acc:
Epoch 7/10, Train Loss: 0.0349, Train Acc: 0.9970, Test Loss: 1.3196, Test Acc:
Epoch 8/10, Train Loss: 0.0117, Train Acc: 0.9995, Test Loss: 1.2459, Test Acc:
Epoch 9/10, Train Loss: 0.0047, Train Acc: 0.9998, Test Loss: 1.1885, Test Acc:
Epoch 10/10, Train Loss: 0.0029, Train Acc: 1.0000, Test Loss: 1.1718, Test Acc:

0.4.3 4. Plot the training results

[87]: # Plotting training curves
plt.figure(figsize=(10, 5))
plt.plot(train_losses_resnet, label='Train Loss')
plt.plot(test_losses_resnet, label='Test Loss')

plt.title('Training and Test Loss')

plt.figure(figsize=(10, 5))
plt.plot(train_accuracies_resnet, label='Train Accuracy')
plt.plot(test_accuracies_resnet, label='Test Accuracy')
plt.title('Training and Test Accuracy')

[121]: import torch
import torchvision.models as models
from torchsummary import summary
from graphviz import Digraph

# Print the summary of the model

summary(efficientnet, (3, 224, 224))

# Create a PyTorch summary

summary_str = str(summary(efficientnet, (3, 224, 224)))

# Function to convert the PyTorch summary to a graph

def torchsummary_to_graph(summary_str):
# Initialize graph
dot = Digraph()

# Add nodes and edges to the graph

layers = summary_str.split('\n')[2:]
for layer in layers:
layer_info = layer.split()
layer_name = layer_info[0]
input_shape = layer_info[1]
output_shape = layer_info[2]
params = layer_info[3]

dot.node(layer_name, label=f"{layer_name}\n{input_shape} ->␣
↪{output_shape}\nParams: {params}")

if 'Conv' in layer_name:
dot.attr('node', shape='rectangle')

for i in range(len(layers) - 1):

dot.edge(layers[i].split()[0], layers[i + 1].split()[0])

return dot

# Create the graph

graph = torchsummary_to_graph(summary_str)

# Save the graph as an image file

graph.render('model_resnet_architecture', format='png', cleanup=True)

Layer (type) Output Shape Param #
ZeroPad2d-1 [-1, 3, 225, 225] 0
Conv2dStaticSamePadding-2 [-1, 32, 112, 112] 864
BatchNorm2d-3 [-1, 32, 112, 112] 64
MemoryEfficientSwish-4 [-1, 32, 112, 112] 0
ZeroPad2d-5 [-1, 32, 114, 114] 0
Conv2dStaticSamePadding-6 [-1, 32, 112, 112] 288
BatchNorm2d-7 [-1, 32, 112, 112] 64
MemoryEfficientSwish-8 [-1, 32, 112, 112] 0
Identity-9 [-1, 32, 1, 1] 0
Conv2dStaticSamePadding-10 [-1, 8, 1, 1] 264
MemoryEfficientSwish-11 [-1, 8, 1, 1] 0
Identity-12 [-1, 8, 1, 1] 0
Conv2dStaticSamePadding-13 [-1, 32, 1, 1] 288
Identity-14 [-1, 32, 112, 112] 0
Conv2dStaticSamePadding-15 [-1, 16, 112, 112] 512
BatchNorm2d-16 [-1, 16, 112, 112] 32
MBConvBlock-17 [-1, 16, 112, 112] 0
Identity-18 [-1, 16, 112, 112] 0
Conv2dStaticSamePadding-19 [-1, 96, 112, 112] 1,536
BatchNorm2d-20 [-1, 96, 112, 112] 192
MemoryEfficientSwish-21 [-1, 96, 112, 112] 0
ZeroPad2d-22 [-1, 96, 113, 113] 0
Conv2dStaticSamePadding-23 [-1, 96, 56, 56] 864
BatchNorm2d-24 [-1, 96, 56, 56] 192
MemoryEfficientSwish-25 [-1, 96, 56, 56] 0
Identity-26 [-1, 96, 1, 1] 0
Conv2dStaticSamePadding-27 [-1, 4, 1, 1] 388
MemoryEfficientSwish-28 [-1, 4, 1, 1] 0

Identity-29 [-1, 4, 1, 1] 0
Conv2dStaticSamePadding-30 [-1, 96, 1, 1] 480
Identity-31 [-1, 96, 56, 56] 0
Conv2dStaticSamePadding-32 [-1, 24, 56, 56] 2,304
BatchNorm2d-33 [-1, 24, 56, 56] 48
MBConvBlock-34 [-1, 24, 56, 56] 0
Identity-35 [-1, 24, 56, 56] 0
Conv2dStaticSamePadding-36 [-1, 144, 56, 56] 3,456
BatchNorm2d-37 [-1, 144, 56, 56] 288
MemoryEfficientSwish-38 [-1, 144, 56, 56] 0
ZeroPad2d-39 [-1, 144, 58, 58] 0
Conv2dStaticSamePadding-40 [-1, 144, 56, 56] 1,296
BatchNorm2d-41 [-1, 144, 56, 56] 288
MemoryEfficientSwish-42 [-1, 144, 56, 56] 0
Identity-43 [-1, 144, 1, 1] 0
Conv2dStaticSamePadding-44 [-1, 6, 1, 1] 870
MemoryEfficientSwish-45 [-1, 6, 1, 1] 0
Identity-46 [-1, 6, 1, 1] 0
Conv2dStaticSamePadding-47 [-1, 144, 1, 1] 1,008
Identity-48 [-1, 144, 56, 56] 0
Conv2dStaticSamePadding-49 [-1, 24, 56, 56] 3,456
BatchNorm2d-50 [-1, 24, 56, 56] 48
MBConvBlock-51 [-1, 24, 56, 56] 0
Identity-52 [-1, 24, 56, 56] 0
Conv2dStaticSamePadding-53 [-1, 144, 56, 56] 3,456
BatchNorm2d-54 [-1, 144, 56, 56] 288
MemoryEfficientSwish-55 [-1, 144, 56, 56] 0
ZeroPad2d-56 [-1, 144, 59, 59] 0
Conv2dStaticSamePadding-57 [-1, 144, 28, 28] 3,600
BatchNorm2d-58 [-1, 144, 28, 28] 288
MemoryEfficientSwish-59 [-1, 144, 28, 28] 0
Identity-60 [-1, 144, 1, 1] 0
Conv2dStaticSamePadding-61 [-1, 6, 1, 1] 870
MemoryEfficientSwish-62 [-1, 6, 1, 1] 0
Identity-63 [-1, 6, 1, 1] 0
Conv2dStaticSamePadding-64 [-1, 144, 1, 1] 1,008
Identity-65 [-1, 144, 28, 28] 0
Conv2dStaticSamePadding-66 [-1, 40, 28, 28] 5,760
BatchNorm2d-67 [-1, 40, 28, 28] 80
MBConvBlock-68 [-1, 40, 28, 28] 0
Identity-69 [-1, 40, 28, 28] 0
Conv2dStaticSamePadding-70 [-1, 240, 28, 28] 9,600
BatchNorm2d-71 [-1, 240, 28, 28] 480
MemoryEfficientSwish-72 [-1, 240, 28, 28] 0
ZeroPad2d-73 [-1, 240, 32, 32] 0
Conv2dStaticSamePadding-74 [-1, 240, 28, 28] 6,000
BatchNorm2d-75 [-1, 240, 28, 28] 480
MemoryEfficientSwish-76 [-1, 240, 28, 28] 0

Identity-77 [-1, 240, 1, 1] 0
Conv2dStaticSamePadding-78 [-1, 10, 1, 1] 2,410
MemoryEfficientSwish-79 [-1, 10, 1, 1] 0
Identity-80 [-1, 10, 1, 1] 0
Conv2dStaticSamePadding-81 [-1, 240, 1, 1] 2,640
Identity-82 [-1, 240, 28, 28] 0
Conv2dStaticSamePadding-83 [-1, 40, 28, 28] 9,600
BatchNorm2d-84 [-1, 40, 28, 28] 80
MBConvBlock-85 [-1, 40, 28, 28] 0
Identity-86 [-1, 40, 28, 28] 0
Conv2dStaticSamePadding-87 [-1, 240, 28, 28] 9,600
BatchNorm2d-88 [-1, 240, 28, 28] 480
MemoryEfficientSwish-89 [-1, 240, 28, 28] 0
ZeroPad2d-90 [-1, 240, 29, 29] 0
Conv2dStaticSamePadding-91 [-1, 240, 14, 14] 2,160
BatchNorm2d-92 [-1, 240, 14, 14] 480
MemoryEfficientSwish-93 [-1, 240, 14, 14] 0
Identity-94 [-1, 240, 1, 1] 0
Conv2dStaticSamePadding-95 [-1, 10, 1, 1] 2,410
MemoryEfficientSwish-96 [-1, 10, 1, 1] 0
Identity-97 [-1, 10, 1, 1] 0
Conv2dStaticSamePadding-98 [-1, 240, 1, 1] 2,640
Identity-99 [-1, 240, 14, 14] 0
Conv2dStaticSamePadding-100 [-1, 80, 14, 14] 19,200
BatchNorm2d-101 [-1, 80, 14, 14] 160
MBConvBlock-102 [-1, 80, 14, 14] 0
Identity-103 [-1, 80, 14, 14] 0
Conv2dStaticSamePadding-104 [-1, 480, 14, 14] 38,400
BatchNorm2d-105 [-1, 480, 14, 14] 960
MemoryEfficientSwish-106 [-1, 480, 14, 14] 0
ZeroPad2d-107 [-1, 480, 16, 16] 0
Conv2dStaticSamePadding-108 [-1, 480, 14, 14] 4,320
BatchNorm2d-109 [-1, 480, 14, 14] 960
MemoryEfficientSwish-110 [-1, 480, 14, 14] 0
Identity-111 [-1, 480, 1, 1] 0
Conv2dStaticSamePadding-112 [-1, 20, 1, 1] 9,620
MemoryEfficientSwish-113 [-1, 20, 1, 1] 0
Identity-114 [-1, 20, 1, 1] 0
Conv2dStaticSamePadding-115 [-1, 480, 1, 1] 10,080
Identity-116 [-1, 480, 14, 14] 0
Conv2dStaticSamePadding-117 [-1, 80, 14, 14] 38,400
BatchNorm2d-118 [-1, 80, 14, 14] 160
MBConvBlock-119 [-1, 80, 14, 14] 0
Identity-120 [-1, 80, 14, 14] 0
Conv2dStaticSamePadding-121 [-1, 480, 14, 14] 38,400
BatchNorm2d-122 [-1, 480, 14, 14] 960
MemoryEfficientSwish-123 [-1, 480, 14, 14] 0
ZeroPad2d-124 [-1, 480, 16, 16] 0

Conv2dStaticSamePadding-125 [-1, 480, 14, 14] 4,320
BatchNorm2d-126 [-1, 480, 14, 14] 960
MemoryEfficientSwish-127 [-1, 480, 14, 14] 0
Identity-128 [-1, 480, 1, 1] 0
Conv2dStaticSamePadding-129 [-1, 20, 1, 1] 9,620
MemoryEfficientSwish-130 [-1, 20, 1, 1] 0
Identity-131 [-1, 20, 1, 1] 0
Conv2dStaticSamePadding-132 [-1, 480, 1, 1] 10,080
Identity-133 [-1, 480, 14, 14] 0
Conv2dStaticSamePadding-134 [-1, 80, 14, 14] 38,400
BatchNorm2d-135 [-1, 80, 14, 14] 160
MBConvBlock-136 [-1, 80, 14, 14] 0
Identity-137 [-1, 80, 14, 14] 0
Conv2dStaticSamePadding-138 [-1, 480, 14, 14] 38,400
BatchNorm2d-139 [-1, 480, 14, 14] 960
MemoryEfficientSwish-140 [-1, 480, 14, 14] 0
ZeroPad2d-141 [-1, 480, 18, 18] 0
Conv2dStaticSamePadding-142 [-1, 480, 14, 14] 12,000
BatchNorm2d-143 [-1, 480, 14, 14] 960
MemoryEfficientSwish-144 [-1, 480, 14, 14] 0
Identity-145 [-1, 480, 1, 1] 0
Conv2dStaticSamePadding-146 [-1, 20, 1, 1] 9,620
MemoryEfficientSwish-147 [-1, 20, 1, 1] 0
Identity-148 [-1, 20, 1, 1] 0
Conv2dStaticSamePadding-149 [-1, 480, 1, 1] 10,080
Identity-150 [-1, 480, 14, 14] 0
Conv2dStaticSamePadding-151 [-1, 112, 14, 14] 53,760
BatchNorm2d-152 [-1, 112, 14, 14] 224
MBConvBlock-153 [-1, 112, 14, 14] 0
Identity-154 [-1, 112, 14, 14] 0
Conv2dStaticSamePadding-155 [-1, 672, 14, 14] 75,264
BatchNorm2d-156 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-157 [-1, 672, 14, 14] 0
ZeroPad2d-158 [-1, 672, 18, 18] 0
Conv2dStaticSamePadding-159 [-1, 672, 14, 14] 16,800
BatchNorm2d-160 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-161 [-1, 672, 14, 14] 0
Identity-162 [-1, 672, 1, 1] 0
Conv2dStaticSamePadding-163 [-1, 28, 1, 1] 18,844
MemoryEfficientSwish-164 [-1, 28, 1, 1] 0
Identity-165 [-1, 28, 1, 1] 0
Conv2dStaticSamePadding-166 [-1, 672, 1, 1] 19,488
Identity-167 [-1, 672, 14, 14] 0
Conv2dStaticSamePadding-168 [-1, 112, 14, 14] 75,264
BatchNorm2d-169 [-1, 112, 14, 14] 224
MBConvBlock-170 [-1, 112, 14, 14] 0
Identity-171 [-1, 112, 14, 14] 0
Conv2dStaticSamePadding-172 [-1, 672, 14, 14] 75,264

BatchNorm2d-173 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-174 [-1, 672, 14, 14] 0
ZeroPad2d-175 [-1, 672, 18, 18] 0
Conv2dStaticSamePadding-176 [-1, 672, 14, 14] 16,800
BatchNorm2d-177 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-178 [-1, 672, 14, 14] 0
Identity-179 [-1, 672, 1, 1] 0
Conv2dStaticSamePadding-180 [-1, 28, 1, 1] 18,844
MemoryEfficientSwish-181 [-1, 28, 1, 1] 0
Identity-182 [-1, 28, 1, 1] 0
Conv2dStaticSamePadding-183 [-1, 672, 1, 1] 19,488
Identity-184 [-1, 672, 14, 14] 0
Conv2dStaticSamePadding-185 [-1, 112, 14, 14] 75,264
BatchNorm2d-186 [-1, 112, 14, 14] 224
MBConvBlock-187 [-1, 112, 14, 14] 0
Identity-188 [-1, 112, 14, 14] 0
Conv2dStaticSamePadding-189 [-1, 672, 14, 14] 75,264
BatchNorm2d-190 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-191 [-1, 672, 14, 14] 0
ZeroPad2d-192 [-1, 672, 17, 17] 0
Conv2dStaticSamePadding-193 [-1, 672, 7, 7] 16,800
BatchNorm2d-194 [-1, 672, 7, 7] 1,344
MemoryEfficientSwish-195 [-1, 672, 7, 7] 0
Identity-196 [-1, 672, 1, 1] 0
Conv2dStaticSamePadding-197 [-1, 28, 1, 1] 18,844
MemoryEfficientSwish-198 [-1, 28, 1, 1] 0
Identity-199 [-1, 28, 1, 1] 0
Conv2dStaticSamePadding-200 [-1, 672, 1, 1] 19,488
Identity-201 [-1, 672, 7, 7] 0
Conv2dStaticSamePadding-202 [-1, 192, 7, 7] 129,024
BatchNorm2d-203 [-1, 192, 7, 7] 384
MBConvBlock-204 [-1, 192, 7, 7] 0
Identity-205 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-206 [-1, 1152, 7, 7] 221,184
BatchNorm2d-207 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-208 [-1, 1152, 7, 7] 0
ZeroPad2d-209 [-1, 1152, 11, 11] 0
Conv2dStaticSamePadding-210 [-1, 1152, 7, 7] 28,800
BatchNorm2d-211 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-212 [-1, 1152, 7, 7] 0
Identity-213 [-1, 1152, 1, 1] 0
Conv2dStaticSamePadding-214 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-215 [-1, 48, 1, 1] 0
Identity-216 [-1, 48, 1, 1] 0
Conv2dStaticSamePadding-217 [-1, 1152, 1, 1] 56,448
Identity-218 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-219 [-1, 192, 7, 7] 221,184
BatchNorm2d-220 [-1, 192, 7, 7] 384

MBConvBlock-221 [-1, 192, 7, 7] 0
Identity-222 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-223 [-1, 1152, 7, 7] 221,184
BatchNorm2d-224 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-225 [-1, 1152, 7, 7] 0
ZeroPad2d-226 [-1, 1152, 11, 11] 0
Conv2dStaticSamePadding-227 [-1, 1152, 7, 7] 28,800
BatchNorm2d-228 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-229 [-1, 1152, 7, 7] 0
Identity-230 [-1, 1152, 1, 1] 0
Conv2dStaticSamePadding-231 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-232 [-1, 48, 1, 1] 0
Identity-233 [-1, 48, 1, 1] 0
Conv2dStaticSamePadding-234 [-1, 1152, 1, 1] 56,448
Identity-235 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-236 [-1, 192, 7, 7] 221,184
BatchNorm2d-237 [-1, 192, 7, 7] 384
MBConvBlock-238 [-1, 192, 7, 7] 0
Identity-239 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-240 [-1, 1152, 7, 7] 221,184
BatchNorm2d-241 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-242 [-1, 1152, 7, 7] 0
ZeroPad2d-243 [-1, 1152, 11, 11] 0
Conv2dStaticSamePadding-244 [-1, 1152, 7, 7] 28,800
BatchNorm2d-245 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-246 [-1, 1152, 7, 7] 0
Identity-247 [-1, 1152, 1, 1] 0
Conv2dStaticSamePadding-248 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-249 [-1, 48, 1, 1] 0
Identity-250 [-1, 48, 1, 1] 0
Conv2dStaticSamePadding-251 [-1, 1152, 1, 1] 56,448
Identity-252 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-253 [-1, 192, 7, 7] 221,184
BatchNorm2d-254 [-1, 192, 7, 7] 384
MBConvBlock-255 [-1, 192, 7, 7] 0
Identity-256 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-257 [-1, 1152, 7, 7] 221,184
BatchNorm2d-258 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-259 [-1, 1152, 7, 7] 0
ZeroPad2d-260 [-1, 1152, 9, 9] 0
Conv2dStaticSamePadding-261 [-1, 1152, 7, 7] 10,368
BatchNorm2d-262 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-263 [-1, 1152, 7, 7] 0
Identity-264 [-1, 1152, 1, 1] 0
Conv2dStaticSamePadding-265 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-266 [-1, 48, 1, 1] 0
Identity-267 [-1, 48, 1, 1] 0
Conv2dStaticSamePadding-268 [-1, 1152, 1, 1] 56,448

Identity-269 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-270 [-1, 320, 7, 7] 368,640
BatchNorm2d-271 [-1, 320, 7, 7] 640
MBConvBlock-272 [-1, 320, 7, 7] 0
Identity-273 [-1, 320, 7, 7] 0
Conv2dStaticSamePadding-274 [-1, 1280, 7, 7] 409,600
BatchNorm2d-275 [-1, 1280, 7, 7] 2,560
MemoryEfficientSwish-276 [-1, 1280, 7, 7] 0
AdaptiveAvgPool2d-277 [-1, 1280, 1, 1] 0
Dropout-278 [-1, 1280] 0
Linear-279 [-1, 200] 256,200
Total params: 4,263,748
Trainable params: 4,263,748
Non-trainable params: 0
Input size (MB): 0.57
Forward/backward pass size (MB): 212.79
Params size (MB): 16.26
Estimated Total Size (MB): 229.63
Layer (type) Output Shape Param #
ZeroPad2d-1 [-1, 3, 225, 225] 0
Conv2dStaticSamePadding-2 [-1, 32, 112, 112] 864
BatchNorm2d-3 [-1, 32, 112, 112] 64
MemoryEfficientSwish-4 [-1, 32, 112, 112] 0
ZeroPad2d-5 [-1, 32, 114, 114] 0
Conv2dStaticSamePadding-6 [-1, 32, 112, 112] 288
BatchNorm2d-7 [-1, 32, 112, 112] 64
MemoryEfficientSwish-8 [-1, 32, 112, 112] 0
Identity-9 [-1, 32, 1, 1] 0
Conv2dStaticSamePadding-10 [-1, 8, 1, 1] 264
MemoryEfficientSwish-11 [-1, 8, 1, 1] 0
Identity-12 [-1, 8, 1, 1] 0
Conv2dStaticSamePadding-13 [-1, 32, 1, 1] 288
Identity-14 [-1, 32, 112, 112] 0
Conv2dStaticSamePadding-15 [-1, 16, 112, 112] 512
BatchNorm2d-16 [-1, 16, 112, 112] 32
MBConvBlock-17 [-1, 16, 112, 112] 0
Identity-18 [-1, 16, 112, 112] 0
Conv2dStaticSamePadding-19 [-1, 96, 112, 112] 1,536
BatchNorm2d-20 [-1, 96, 112, 112] 192
MemoryEfficientSwish-21 [-1, 96, 112, 112] 0
ZeroPad2d-22 [-1, 96, 113, 113] 0
Conv2dStaticSamePadding-23 [-1, 96, 56, 56] 864
BatchNorm2d-24 [-1, 96, 56, 56] 192

MemoryEfficientSwish-25 [-1, 96, 56, 56] 0
Identity-26 [-1, 96, 1, 1] 0
Conv2dStaticSamePadding-27 [-1, 4, 1, 1] 388
MemoryEfficientSwish-28 [-1, 4, 1, 1] 0
Identity-29 [-1, 4, 1, 1] 0
Conv2dStaticSamePadding-30 [-1, 96, 1, 1] 480
Identity-31 [-1, 96, 56, 56] 0
Conv2dStaticSamePadding-32 [-1, 24, 56, 56] 2,304
BatchNorm2d-33 [-1, 24, 56, 56] 48
MBConvBlock-34 [-1, 24, 56, 56] 0
Identity-35 [-1, 24, 56, 56] 0
Conv2dStaticSamePadding-36 [-1, 144, 56, 56] 3,456
BatchNorm2d-37 [-1, 144, 56, 56] 288
MemoryEfficientSwish-38 [-1, 144, 56, 56] 0
ZeroPad2d-39 [-1, 144, 58, 58] 0
Conv2dStaticSamePadding-40 [-1, 144, 56, 56] 1,296
BatchNorm2d-41 [-1, 144, 56, 56] 288
MemoryEfficientSwish-42 [-1, 144, 56, 56] 0
Identity-43 [-1, 144, 1, 1] 0
Conv2dStaticSamePadding-44 [-1, 6, 1, 1] 870
MemoryEfficientSwish-45 [-1, 6, 1, 1] 0
Identity-46 [-1, 6, 1, 1] 0
Conv2dStaticSamePadding-47 [-1, 144, 1, 1] 1,008
Identity-48 [-1, 144, 56, 56] 0
Conv2dStaticSamePadding-49 [-1, 24, 56, 56] 3,456
BatchNorm2d-50 [-1, 24, 56, 56] 48
MBConvBlock-51 [-1, 24, 56, 56] 0
Identity-52 [-1, 24, 56, 56] 0
Conv2dStaticSamePadding-53 [-1, 144, 56, 56] 3,456
BatchNorm2d-54 [-1, 144, 56, 56] 288
MemoryEfficientSwish-55 [-1, 144, 56, 56] 0
ZeroPad2d-56 [-1, 144, 59, 59] 0
Conv2dStaticSamePadding-57 [-1, 144, 28, 28] 3,600
BatchNorm2d-58 [-1, 144, 28, 28] 288
MemoryEfficientSwish-59 [-1, 144, 28, 28] 0
Identity-60 [-1, 144, 1, 1] 0
Conv2dStaticSamePadding-61 [-1, 6, 1, 1] 870
MemoryEfficientSwish-62 [-1, 6, 1, 1] 0
Identity-63 [-1, 6, 1, 1] 0
Conv2dStaticSamePadding-64 [-1, 144, 1, 1] 1,008
Identity-65 [-1, 144, 28, 28] 0
Conv2dStaticSamePadding-66 [-1, 40, 28, 28] 5,760
BatchNorm2d-67 [-1, 40, 28, 28] 80
MBConvBlock-68 [-1, 40, 28, 28] 0
Identity-69 [-1, 40, 28, 28] 0
Conv2dStaticSamePadding-70 [-1, 240, 28, 28] 9,600
BatchNorm2d-71 [-1, 240, 28, 28] 480
MemoryEfficientSwish-72 [-1, 240, 28, 28] 0

ZeroPad2d-73 [-1, 240, 32, 32] 0
Conv2dStaticSamePadding-74 [-1, 240, 28, 28] 6,000
BatchNorm2d-75 [-1, 240, 28, 28] 480
MemoryEfficientSwish-76 [-1, 240, 28, 28] 0
Identity-77 [-1, 240, 1, 1] 0
Conv2dStaticSamePadding-78 [-1, 10, 1, 1] 2,410
MemoryEfficientSwish-79 [-1, 10, 1, 1] 0
Identity-80 [-1, 10, 1, 1] 0
Conv2dStaticSamePadding-81 [-1, 240, 1, 1] 2,640
Identity-82 [-1, 240, 28, 28] 0
Conv2dStaticSamePadding-83 [-1, 40, 28, 28] 9,600
BatchNorm2d-84 [-1, 40, 28, 28] 80
MBConvBlock-85 [-1, 40, 28, 28] 0
Identity-86 [-1, 40, 28, 28] 0
Conv2dStaticSamePadding-87 [-1, 240, 28, 28] 9,600
BatchNorm2d-88 [-1, 240, 28, 28] 480
MemoryEfficientSwish-89 [-1, 240, 28, 28] 0
ZeroPad2d-90 [-1, 240, 29, 29] 0
Conv2dStaticSamePadding-91 [-1, 240, 14, 14] 2,160
BatchNorm2d-92 [-1, 240, 14, 14] 480
MemoryEfficientSwish-93 [-1, 240, 14, 14] 0
Identity-94 [-1, 240, 1, 1] 0
Conv2dStaticSamePadding-95 [-1, 10, 1, 1] 2,410
MemoryEfficientSwish-96 [-1, 10, 1, 1] 0
Identity-97 [-1, 10, 1, 1] 0
Conv2dStaticSamePadding-98 [-1, 240, 1, 1] 2,640
Identity-99 [-1, 240, 14, 14] 0
Conv2dStaticSamePadding-100 [-1, 80, 14, 14] 19,200
BatchNorm2d-101 [-1, 80, 14, 14] 160
MBConvBlock-102 [-1, 80, 14, 14] 0
Identity-103 [-1, 80, 14, 14] 0
Conv2dStaticSamePadding-104 [-1, 480, 14, 14] 38,400
BatchNorm2d-105 [-1, 480, 14, 14] 960
MemoryEfficientSwish-106 [-1, 480, 14, 14] 0
ZeroPad2d-107 [-1, 480, 16, 16] 0
Conv2dStaticSamePadding-108 [-1, 480, 14, 14] 4,320
BatchNorm2d-109 [-1, 480, 14, 14] 960
MemoryEfficientSwish-110 [-1, 480, 14, 14] 0
Identity-111 [-1, 480, 1, 1] 0
Conv2dStaticSamePadding-112 [-1, 20, 1, 1] 9,620
MemoryEfficientSwish-113 [-1, 20, 1, 1] 0
Identity-114 [-1, 20, 1, 1] 0
Conv2dStaticSamePadding-115 [-1, 480, 1, 1] 10,080
Identity-116 [-1, 480, 14, 14] 0
Conv2dStaticSamePadding-117 [-1, 80, 14, 14] 38,400
BatchNorm2d-118 [-1, 80, 14, 14] 160
MBConvBlock-119 [-1, 80, 14, 14] 0
Identity-120 [-1, 80, 14, 14] 0

Conv2dStaticSamePadding-121 [-1, 480, 14, 14] 38,400
BatchNorm2d-122 [-1, 480, 14, 14] 960
MemoryEfficientSwish-123 [-1, 480, 14, 14] 0
ZeroPad2d-124 [-1, 480, 16, 16] 0
Conv2dStaticSamePadding-125 [-1, 480, 14, 14] 4,320
BatchNorm2d-126 [-1, 480, 14, 14] 960
MemoryEfficientSwish-127 [-1, 480, 14, 14] 0
Identity-128 [-1, 480, 1, 1] 0
Conv2dStaticSamePadding-129 [-1, 20, 1, 1] 9,620
MemoryEfficientSwish-130 [-1, 20, 1, 1] 0
Identity-131 [-1, 20, 1, 1] 0
Conv2dStaticSamePadding-132 [-1, 480, 1, 1] 10,080
Identity-133 [-1, 480, 14, 14] 0
Conv2dStaticSamePadding-134 [-1, 80, 14, 14] 38,400
BatchNorm2d-135 [-1, 80, 14, 14] 160
MBConvBlock-136 [-1, 80, 14, 14] 0
Identity-137 [-1, 80, 14, 14] 0
Conv2dStaticSamePadding-138 [-1, 480, 14, 14] 38,400
BatchNorm2d-139 [-1, 480, 14, 14] 960
MemoryEfficientSwish-140 [-1, 480, 14, 14] 0
ZeroPad2d-141 [-1, 480, 18, 18] 0
Conv2dStaticSamePadding-142 [-1, 480, 14, 14] 12,000
BatchNorm2d-143 [-1, 480, 14, 14] 960
MemoryEfficientSwish-144 [-1, 480, 14, 14] 0
Identity-145 [-1, 480, 1, 1] 0
Conv2dStaticSamePadding-146 [-1, 20, 1, 1] 9,620
MemoryEfficientSwish-147 [-1, 20, 1, 1] 0
Identity-148 [-1, 20, 1, 1] 0
Conv2dStaticSamePadding-149 [-1, 480, 1, 1] 10,080
Identity-150 [-1, 480, 14, 14] 0
Conv2dStaticSamePadding-151 [-1, 112, 14, 14] 53,760
BatchNorm2d-152 [-1, 112, 14, 14] 224
MBConvBlock-153 [-1, 112, 14, 14] 0
Identity-154 [-1, 112, 14, 14] 0
Conv2dStaticSamePadding-155 [-1, 672, 14, 14] 75,264
BatchNorm2d-156 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-157 [-1, 672, 14, 14] 0
ZeroPad2d-158 [-1, 672, 18, 18] 0
Conv2dStaticSamePadding-159 [-1, 672, 14, 14] 16,800
BatchNorm2d-160 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-161 [-1, 672, 14, 14] 0
Identity-162 [-1, 672, 1, 1] 0
Conv2dStaticSamePadding-163 [-1, 28, 1, 1] 18,844
MemoryEfficientSwish-164 [-1, 28, 1, 1] 0
Identity-165 [-1, 28, 1, 1] 0
Conv2dStaticSamePadding-166 [-1, 672, 1, 1] 19,488
Identity-167 [-1, 672, 14, 14] 0
Conv2dStaticSamePadding-168 [-1, 112, 14, 14] 75,264

BatchNorm2d-169 [-1, 112, 14, 14] 224
MBConvBlock-170 [-1, 112, 14, 14] 0
Identity-171 [-1, 112, 14, 14] 0
Conv2dStaticSamePadding-172 [-1, 672, 14, 14] 75,264
BatchNorm2d-173 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-174 [-1, 672, 14, 14] 0
ZeroPad2d-175 [-1, 672, 18, 18] 0
Conv2dStaticSamePadding-176 [-1, 672, 14, 14] 16,800
BatchNorm2d-177 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-178 [-1, 672, 14, 14] 0
Identity-179 [-1, 672, 1, 1] 0
Conv2dStaticSamePadding-180 [-1, 28, 1, 1] 18,844
MemoryEfficientSwish-181 [-1, 28, 1, 1] 0
Identity-182 [-1, 28, 1, 1] 0
Conv2dStaticSamePadding-183 [-1, 672, 1, 1] 19,488
Identity-184 [-1, 672, 14, 14] 0
Conv2dStaticSamePadding-185 [-1, 112, 14, 14] 75,264
BatchNorm2d-186 [-1, 112, 14, 14] 224
MBConvBlock-187 [-1, 112, 14, 14] 0
Identity-188 [-1, 112, 14, 14] 0
Conv2dStaticSamePadding-189 [-1, 672, 14, 14] 75,264
BatchNorm2d-190 [-1, 672, 14, 14] 1,344
MemoryEfficientSwish-191 [-1, 672, 14, 14] 0
ZeroPad2d-192 [-1, 672, 17, 17] 0
Conv2dStaticSamePadding-193 [-1, 672, 7, 7] 16,800
BatchNorm2d-194 [-1, 672, 7, 7] 1,344
MemoryEfficientSwish-195 [-1, 672, 7, 7] 0
Identity-196 [-1, 672, 1, 1] 0
Conv2dStaticSamePadding-197 [-1, 28, 1, 1] 18,844
MemoryEfficientSwish-198 [-1, 28, 1, 1] 0
Identity-199 [-1, 28, 1, 1] 0
Conv2dStaticSamePadding-200 [-1, 672, 1, 1] 19,488
Identity-201 [-1, 672, 7, 7] 0
Conv2dStaticSamePadding-202 [-1, 192, 7, 7] 129,024
BatchNorm2d-203 [-1, 192, 7, 7] 384
MBConvBlock-204 [-1, 192, 7, 7] 0
Identity-205 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-206 [-1, 1152, 7, 7] 221,184
BatchNorm2d-207 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-208 [-1, 1152, 7, 7] 0
ZeroPad2d-209 [-1, 1152, 11, 11] 0
Conv2dStaticSamePadding-210 [-1, 1152, 7, 7] 28,800
BatchNorm2d-211 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-212 [-1, 1152, 7, 7] 0
Identity-213 [-1, 1152, 1, 1] 0
Conv2dStaticSamePadding-214 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-215 [-1, 48, 1, 1] 0
Identity-216 [-1, 48, 1, 1] 0

Conv2dStaticSamePadding-217 [-1, 1152, 1, 1] 56,448
Identity-218 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-219 [-1, 192, 7, 7] 221,184
BatchNorm2d-220 [-1, 192, 7, 7] 384
MBConvBlock-221 [-1, 192, 7, 7] 0
Identity-222 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-223 [-1, 1152, 7, 7] 221,184
BatchNorm2d-224 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-225 [-1, 1152, 7, 7] 0
ZeroPad2d-226 [-1, 1152, 11, 11] 0
Conv2dStaticSamePadding-227 [-1, 1152, 7, 7] 28,800
BatchNorm2d-228 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-229 [-1, 1152, 7, 7] 0
Identity-230 [-1, 1152, 1, 1] 0
Conv2dStaticSamePadding-231 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-232 [-1, 48, 1, 1] 0
Identity-233 [-1, 48, 1, 1] 0
Conv2dStaticSamePadding-234 [-1, 1152, 1, 1] 56,448
Identity-235 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-236 [-1, 192, 7, 7] 221,184
BatchNorm2d-237 [-1, 192, 7, 7] 384
MBConvBlock-238 [-1, 192, 7, 7] 0
Identity-239 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-240 [-1, 1152, 7, 7] 221,184
BatchNorm2d-241 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-242 [-1, 1152, 7, 7] 0
ZeroPad2d-243 [-1, 1152, 11, 11] 0
Conv2dStaticSamePadding-244 [-1, 1152, 7, 7] 28,800
BatchNorm2d-245 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-246 [-1, 1152, 7, 7] 0
Identity-247 [-1, 1152, 1, 1] 0
Conv2dStaticSamePadding-248 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-249 [-1, 48, 1, 1] 0
Identity-250 [-1, 48, 1, 1] 0
Conv2dStaticSamePadding-251 [-1, 1152, 1, 1] 56,448
Identity-252 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-253 [-1, 192, 7, 7] 221,184
BatchNorm2d-254 [-1, 192, 7, 7] 384
MBConvBlock-255 [-1, 192, 7, 7] 0
Identity-256 [-1, 192, 7, 7] 0
Conv2dStaticSamePadding-257 [-1, 1152, 7, 7] 221,184
BatchNorm2d-258 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-259 [-1, 1152, 7, 7] 0
ZeroPad2d-260 [-1, 1152, 9, 9] 0
Conv2dStaticSamePadding-261 [-1, 1152, 7, 7] 10,368
BatchNorm2d-262 [-1, 1152, 7, 7] 2,304
MemoryEfficientSwish-263 [-1, 1152, 7, 7] 0
Identity-264 [-1, 1152, 1, 1] 0

Conv2dStaticSamePadding-265 [-1, 48, 1, 1] 55,344
MemoryEfficientSwish-266 [-1, 48, 1, 1] 0
Identity-267 [-1, 48, 1, 1] 0
Conv2dStaticSamePadding-268 [-1, 1152, 1, 1] 56,448
Identity-269 [-1, 1152, 7, 7] 0
Conv2dStaticSamePadding-270 [-1, 320, 7, 7] 368,640
BatchNorm2d-271 [-1, 320, 7, 7] 640
MBConvBlock-272 [-1, 320, 7, 7] 0
Identity-273 [-1, 320, 7, 7] 0
Conv2dStaticSamePadding-274 [-1, 1280, 7, 7] 409,600
BatchNorm2d-275 [-1, 1280, 7, 7] 2,560
MemoryEfficientSwish-276 [-1, 1280, 7, 7] 0
AdaptiveAvgPool2d-277 [-1, 1280, 1, 1] 0
Dropout-278 [-1, 1280] 0
Linear-279 [-1, 200] 256,200
Total params: 4,263,748
Trainable params: 4,263,748
Non-trainable params: 0
Input size (MB): 0.57
Forward/backward pass size (MB): 212.79
Params size (MB): 16.26
Estimated Total Size (MB): 229.63

FileNotFoundError Traceback (most recent call last)
File ~\anaconda3\lib\site-packages\graphviz\backend\, in␣
↪run_check(cmd, input_lines, encoding, quiet, **kwargs)

80 else:
---> 81 proc =, **kwargs)
82 except OSError as e:

File ~\anaconda3\lib\, in run(input, capture_output, timeout,␣

↪check, *popenargs, **kwargs)

503 kwargs['stderr'] = PIPE

--> 505 with Popen(*popenargs, **kwargs) as process:
506 try:

File ~\anaconda3\lib\, in Popen.__init__(self, args, bufsize,␣

↪executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env,␣
↪universal_newlines, startupinfo, creationflags, restore_signals,␣
↪start_new_session, pass_fds, user, group, extra_groups, encoding, errors,␣
↪text, umask)

948 self.stderr = io.TextIOWrapper(self.stderr,

949 encoding=encoding, errors=errors)
--> 951 self._execute_child(args, executable, preexec_fn, close_fds,

952 pass_fds, cwd, env,
953 startupinfo, creationflags, shell,
954 p2cread, p2cwrite,
955 c2pread, c2pwrite,
956 errread, errwrite,
957 restore_signals,
958 gid, gids, uid, umask,
959 start_new_session)
960 except:
961 # Cleanup if the child failed starting.

File ~\anaconda3\lib\, in Popen._execute_child(self, args,␣

↪executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo,␣
↪creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite,␣
↪unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask,␣

1419 try:
-> 1420 hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
1421 # no special security
1422 None, None,
1423 int(not close_fds),
1424 creationflags,
1425 env,
1426 cwd,
1427 startupinfo)
1428 finally:
1429 # Child is launched. Close the parent's copy of those pipe
1430 # handles that only the child should have open. You need
1433 # pipe will not close when the child process exits and the
1434 # ReadFile will hang.

FileNotFoundError: [WinError 2] The system cannot find the file specified

The above exception was the direct cause of the following exception:

ExecutableNotFound Traceback (most recent call last)

Input In [121], in <cell line: 42>()
39 graph = torchsummary_to_graph(summary_str)
41 # Save the graph as an image file
---> 42 graph.render('model_resnet_architecture', format='png', cleanup=True)

File ~\anaconda3\lib\site-packages\graphviz\, in␣

↪deprecate_positional_args.<locals>.decorator.<locals>.wrapper(*args, **kwargs)

162 wanted = ', '.join(f'{name}={value!r}'

163 for name, value in deprecated.items())
164 warnings.warn(f'The signature of {func.__name__} will be reduced'
165 f' to {supported_number} positional args'

166 f' {list(supported)}: pass {wanted}'
167 ' as keyword arg(s)',
168 stacklevel=stacklevel,
169 category=category)
--> 171 return func(*args, **kwargs)

File ~\anaconda3\lib\site-packages\graphviz\, in Render.

↪render(self, filename, directory, view, cleanup, format, renderer, formatter,␣
↪neato_no_op, quiet, quiet_view, outfile, engine, raise_if_result_exists,␣

118 filepath =, directory=directory, skip_existing=None)

120 args.append(filepath)
--> 122 rendered = self._render(*args, **kwargs)
124 if cleanup:
125 log.debug('delete %r', filepath)

File ~\anaconda3\lib\site-packages\graphviz\, in␣

↪deprecate_positional_args.<locals>.decorator.<locals>.wrapper(*args, **kwargs)

162 wanted = ', '.join(f'{name}={value!r}'

163 for name, value in deprecated.items())
164 warnings.warn(f'The signature of {func.__name__} will be reduced'
165 f' to {supported_number} positional args'
166 f' {list(supported)}: pass {wanted}'
167 ' as keyword arg(s)',
168 stacklevel=stacklevel,
169 category=category)
--> 171 return func(*args, **kwargs)

File ~\anaconda3\lib\site-packages\graphviz\backend\, in␣

↪render(engine, format, filepath, renderer, formatter, neato_no_op, quiet,␣
↪outfile, raise_if_result_exists, overwrite_filepath)

320 raise exceptions.FileExistsError(f'output file exists: {os.


322 cmd += args

--> 324 execute.run_check(cmd,
325 cwd=filepath.parent if else None,
326 quiet=quiet,
327 capture_output=True)
329 return os.fspath(outfile)

File ~\anaconda3\lib\site-packages\graphviz\backend\, in␣

↪run_check(cmd, input_lines, encoding, quiet, **kwargs)

82 except OSError as e:
83 if e.errno == errno.ENOENT:
---> 84 raise ExecutableNotFound(cmd) from e
85 raise
87 if not quiet and proc.stderr:

ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz␣
↪executables are on your systems' PATH

[118]: # Move the model to GPU if available

device = torch.device("cuda")

[118]: ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3),
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1,
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1,
1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1,

1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True,
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True,
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True,
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1,
1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True,
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True,
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True,

(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1,
1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True,
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True,
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True,
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True,
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=200, bias=True)

[ ]:

[109]: # Print the summary of the model

summary(model_resnet, (3, 224, 224))

RuntimeError Traceback (most recent call last)
Input In [109], in <cell line: 2>()
1 # Print the summary of the model
----> 2 summary(model_resnet, (3, 224, 224))

File ~\anaconda3\lib\site-packages\torchsummary\, in␣
↪summary(model, input_size, batch_size, device)

68 model.apply(register_hook)
70 # make a forward pass
71 # print(x.shape)
---> 72 model(*x)
74 # remove these hooks
75 for h in hooks:

File ~\anaconda3\lib\site-packages\torch\nn\modules\, in Module.

↪_wrapped_call_impl(self, *args, **kwargs)

1516 return self._compiled_call_impl(*args, **kwargs) # type:␣


1517 else:
-> 1518 return self._call_impl(*args, **kwargs)

File ~\anaconda3\lib\site-packages\torch\nn\modules\, in Module.

↪_call_impl(self, *args, **kwargs)

1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self.
↪_forward_hooks or self._forward_pre_hooks

1525 or _global_backward_pre_hooks or _global_backward_hooks

1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None

File ~\anaconda3\lib\site-packages\torchvision\models\, in ResNet.

↪forward(self, x)

284 def forward(self, x: Tensor) -> Tensor:

--> 285 return self._forward_impl(x)

File ~\anaconda3\lib\site-packages\torchvision\models\, in ResNet.

↪_forward_impl(self, x)

266 def _forward_impl(self, x: Tensor) -> Tensor:

267 # See note [TorchScript super()]
--> 268 x = self.conv1(x)
269 x = self.bn1(x)
270 x = self.relu(x)

File ~\anaconda3\lib\site-packages\torch\nn\modules\, in Module.

↪_wrapped_call_impl(self, *args, **kwargs)

1516 return self._compiled_call_impl(*args, **kwargs) # type:␣


1517 else:
-> 1518 return self._call_impl(*args, **kwargs)

File ~\anaconda3\lib\site-packages\torch\nn\modules\, in Module.
↪_call_impl(self, *args, **kwargs)

1565 bw_hook = hooks.BackwardHook(self, full_backward_hooks,␣


1566 args = bw_hook.setup_input_hook(args)

-> 1568 result = forward_call(*args, **kwargs)
1569 if _global_forward_hooks or self._forward_hooks:
1570 for hook_id, hook in (
1571 *_global_forward_hooks.items(),
1572 *self._forward_hooks.items(),
1573 ):
1574 # mark that always called hook is run

File ~\anaconda3\lib\site-packages\torch\nn\modules\, in Conv2d.

↪forward(self, input)

459 def forward(self, input: Tensor) -> Tensor:

--> 460 return self._conv_forward(input, self.weight, self.bias)

File ~\anaconda3\lib\site-packages\torch\nn\modules\, in Conv2d.

↪_conv_forward(self, input, weight, bias)

452 if self.padding_mode != 'zeros':

453 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice,␣

454 weight, bias, self.stride,

455 _pair(0), self.dilation, self.groups)
--> 456 return F.conv2d(input, weight, bias, self.stride,
457 self.padding, self.dilation, self.groups)

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.

↪FloatTensor) should be the same

Total number of parameters

[98]: import torch
import torchvision.models as models

# Count the total number of parameters

total_params = sum(p.numel() for p in mobilenet.parameters())
print("Total number of parameters:", total_params)

Total number of parameters: 2480072

[99]: import torch

import torchvision.models as models

# Count the total number of parameters

total_params = sum(p.numel() for p in efficientnet.parameters())
print("Total number of parameters:", total_params)

Total number of parameters: 4263748

[100]: import torch

import torchvision.models as models

# Count the total number of parameters

total_params = sum(p.numel() for p in inception_model.parameters())
print("Total number of parameters:", total_params)

Total number of parameters: 25522064

[91]: !pip install graphviz

Collecting graphviz
Downloading graphviz-0.20.1-py3-none-any.whl.metadata (12 kB)
Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
---------------------------------------- 47.0/47.0 kB 2.3 MB/s eta 0:00:00
Installing collected packages: graphviz
Successfully installed graphviz-0.20.1

[101]: import torch

import torchvision.models as models

# Count the total number of parameters

total_params = sum(p.numel() for p in model_resnet.parameters())
print("Total number of parameters:", total_params)

Total number of parameters: 11279112

The weights of all are included in this drive link :

[ ]:


You might also like