Computer_Vision/Chapter05/Implementing_VGG16_for_image_classification.ipynb
2024-02-13 03:34:51 +01:00

177 KiB

Open In Colab

import torchvision
import torch.nn as nn
import torch
import torch.nn.functional as F
from torchvision import transforms,models,datasets
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from torch import optim
device = 'cuda' if torch.cuda.is_available() else 'cpu'
import cv2, glob, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
!pip install -q kaggle
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to enable.
Saving kaggle.json to kaggle.json
kaggle.json
!kaggle datasets download -d tongpython/cat-and-dog
!unzip cat-and-dog.zip
train_data_dir = 'training_set/training_set'
test_data_dir = 'test_set/test_set'
class CatsDogs(Dataset):
    def __init__(self, folder):
        cats = glob(folder+'/cats/*.jpg')
        dogs = glob(folder+'/dogs/*.jpg')
        self.fpaths = cats[:500] + dogs[:500]
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        from random import shuffle, seed; seed(10); shuffle(self.fpaths)
        self.targets = [fpath.split('/')[-1].startswith('dog') for fpath in self.fpaths] 
    def __len__(self): return len(self.fpaths)
    def __getitem__(self, ix):
        f = self.fpaths[ix]
        target = self.targets[ix]
        im = (cv2.imread(f)[:,:,::-1])
        im = cv2.resize(im, (224,224))
        im = torch.tensor(im/255)
        im = im.permute(2,0,1)
        im = self.normalize(im) 
        return im.float().to(device), torch.tensor([target]).float().to(device)
data = CatsDogs(train_data_dir)
im, label = data[200]
plt.imshow(im.permute(1,2,0).cpu())
print(label)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
tensor([0.], device='cuda:0')
def get_model():
    model = models.vgg16(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
    model.classifier = nn.Sequential(nn.Flatten(),
    nn.Linear(512, 128),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(128, 1),
    nn.Sigmoid())
    loss_fn = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr= 1e-3)
    return model.to(device), loss_fn, optimizer
!pip install torch_summary
from torchsummary import summary
model, criterion, optimizer = get_model()
summary(model, torch.zeros(1,3,224,224))
Requirement already satisfied: torch_summary in /usr/local/lib/python3.6/dist-packages (1.4.3)
==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        (1,792)
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        (36,928)
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       (73,856)
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       (147,584)
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         (295,168)
|    └─ReLU: 2-12                        [-1, 256, 56, 56]         --
|    └─Conv2d: 2-13                      [-1, 256, 56, 56]         (590,080)
|    └─ReLU: 2-14                        [-1, 256, 56, 56]         --
|    └─Conv2d: 2-15                      [-1, 256, 56, 56]         (590,080)
|    └─ReLU: 2-16                        [-1, 256, 56, 56]         --
|    └─MaxPool2d: 2-17                   [-1, 256, 28, 28]         --
|    └─Conv2d: 2-18                      [-1, 512, 28, 28]         (1,180,160)
|    └─ReLU: 2-19                        [-1, 512, 28, 28]         --
|    └─Conv2d: 2-20                      [-1, 512, 28, 28]         (2,359,808)
|    └─ReLU: 2-21                        [-1, 512, 28, 28]         --
|    └─Conv2d: 2-22                      [-1, 512, 28, 28]         (2,359,808)
|    └─ReLU: 2-23                        [-1, 512, 28, 28]         --
|    └─MaxPool2d: 2-24                   [-1, 512, 14, 14]         --
|    └─Conv2d: 2-25                      [-1, 512, 14, 14]         (2,359,808)
|    └─ReLU: 2-26                        [-1, 512, 14, 14]         --
|    └─Conv2d: 2-27                      [-1, 512, 14, 14]         (2,359,808)
|    └─ReLU: 2-28                        [-1, 512, 14, 14]         --
|    └─Conv2d: 2-29                      [-1, 512, 14, 14]         (2,359,808)
|    └─ReLU: 2-30                        [-1, 512, 14, 14]         --
|    └─MaxPool2d: 2-31                   [-1, 512, 7, 7]           --
├─AdaptiveAvgPool2d: 1-2                 [-1, 512, 1, 1]           --
├─Sequential: 1-3                        [-1, 1]                   --
|    └─Flatten: 2-32                     [-1, 512]                 --
|    └─Linear: 2-33                      [-1, 128]                 65,664
|    └─ReLU: 2-34                        [-1, 128]                 --
|    └─Dropout: 2-35                     [-1, 128]                 --
|    └─Linear: 2-36                      [-1, 1]                   129
|    └─Sigmoid: 2-37                     [-1, 1]                   --
==========================================================================================
Total params: 14,780,481
Trainable params: 65,793
Non-trainable params: 14,714,688
Total mult-adds (G): 15.36
==========================================================================================
Input size (MB): 0.57
Forward/backward pass size (MB): 103.36
Params size (MB): 56.38
Estimated Total Size (MB): 160.32
==========================================================================================
==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        (1,792)
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        (36,928)
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       (73,856)
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       (147,584)
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         (295,168)
|    └─ReLU: 2-12                        [-1, 256, 56, 56]         --
|    └─Conv2d: 2-13                      [-1, 256, 56, 56]         (590,080)
|    └─ReLU: 2-14                        [-1, 256, 56, 56]         --
|    └─Conv2d: 2-15                      [-1, 256, 56, 56]         (590,080)
|    └─ReLU: 2-16                        [-1, 256, 56, 56]         --
|    └─MaxPool2d: 2-17                   [-1, 256, 28, 28]         --
|    └─Conv2d: 2-18                      [-1, 512, 28, 28]         (1,180,160)
|    └─ReLU: 2-19                        [-1, 512, 28, 28]         --
|    └─Conv2d: 2-20                      [-1, 512, 28, 28]         (2,359,808)
|    └─ReLU: 2-21                        [-1, 512, 28, 28]         --
|    └─Conv2d: 2-22                      [-1, 512, 28, 28]         (2,359,808)
|    └─ReLU: 2-23                        [-1, 512, 28, 28]         --
|    └─MaxPool2d: 2-24                   [-1, 512, 14, 14]         --
|    └─Conv2d: 2-25                      [-1, 512, 14, 14]         (2,359,808)
|    └─ReLU: 2-26                        [-1, 512, 14, 14]         --
|    └─Conv2d: 2-27                      [-1, 512, 14, 14]         (2,359,808)
|    └─ReLU: 2-28                        [-1, 512, 14, 14]         --
|    └─Conv2d: 2-29                      [-1, 512, 14, 14]         (2,359,808)
|    └─ReLU: 2-30                        [-1, 512, 14, 14]         --
|    └─MaxPool2d: 2-31                   [-1, 512, 7, 7]           --
├─AdaptiveAvgPool2d: 1-2                 [-1, 512, 1, 1]           --
├─Sequential: 1-3                        [-1, 1]                   --
|    └─Flatten: 2-32                     [-1, 512]                 --
|    └─Linear: 2-33                      [-1, 128]                 65,664
|    └─ReLU: 2-34                        [-1, 128]                 --
|    └─Dropout: 2-35                     [-1, 128]                 --
|    └─Linear: 2-36                      [-1, 1]                   129
|    └─Sigmoid: 2-37                     [-1, 1]                   --
==========================================================================================
Total params: 14,780,481
Trainable params: 65,793
Non-trainable params: 14,714,688
Total mult-adds (G): 15.36
==========================================================================================
Input size (MB): 0.57
Forward/backward pass size (MB): 103.36
Params size (MB): 56.38
Estimated Total Size (MB): 160.32
==========================================================================================
def train_batch(x, y, model, opt, loss_fn):
    model.train()
    prediction = model(x)
    batch_loss = loss_fn(prediction, y)
    batch_loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return batch_loss.item()
@torch.no_grad()
def accuracy(x, y, model):
    model.eval()
    prediction = model(x)
    is_correct = (prediction > 0.5) == y
    return is_correct.cpu().numpy().tolist()
def get_data():
    train = CatsDogs(train_data_dir)
    trn_dl = DataLoader(train, batch_size=32, shuffle=True, drop_last = True)
    val = CatsDogs(test_data_dir)
    val_dl = DataLoader(val, batch_size=32, shuffle=True, drop_last = True)
    return trn_dl, val_dl
trn_dl, val_dl = get_data()
model, loss_fn, optimizer = get_model()
train_losses, train_accuracies = [], []
val_accuracies = []
for epoch in range(5):
    print(f" epoch {epoch + 1}/5")
    train_epoch_losses, train_epoch_accuracies = [], []
    val_epoch_accuracies = []

    for ix, batch in enumerate(iter(trn_dl)):
        x, y = batch
        batch_loss = train_batch(x, y, model, optimizer, loss_fn)
        train_epoch_losses.append(batch_loss) 
    train_epoch_loss = np.array(train_epoch_losses).mean()

    for ix, batch in enumerate(iter(trn_dl)):
        x, y = batch
        is_correct = accuracy(x, y, model)
        train_epoch_accuracies.extend(is_correct)
    train_epoch_accuracy = np.mean(train_epoch_accuracies)

    for ix, batch in enumerate(iter(val_dl)):
        x, y = batch
        val_is_correct = accuracy(x, y, model)
        val_epoch_accuracies.extend(val_is_correct)
    val_epoch_accuracy = np.mean(val_epoch_accuracies)

    train_losses.append(train_epoch_loss)
    train_accuracies.append(train_epoch_accuracy)
    val_accuracies.append(val_epoch_accuracy)
 epoch 1/5
 epoch 2/5
 epoch 3/5
 epoch 4/5
 epoch 5/5
epochs = np.arange(5)+1
import matplotlib.ticker as mtick
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
%matplotlib inline
plt.plot(epochs, train_accuracies, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracies, 'r', label='Validation accuracy')
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.title('Training and validation accuracy with VGG16 \nand 1K training data points')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.ylim(0.95,1)
plt.gca().set_yticklabels(['{:.0f}%'.format(x*100) for x in plt.gca().get_yticks()]) 
plt.legend()
plt.grid('off')
plt.show()