177 KiB
177 KiB
import torchvision
import torch.nn as nn
import torch
import torch.nn.functional as F
from torchvision import transforms,models,datasets
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from torch import optim
device = 'cuda' if torch.cuda.is_available() else 'cpu'
import cv2, glob, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
!pip install -q kaggle
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
Upload widget is only available when the cell has been executed in the
current browser session. Please rerun this cell to enable.
Saving kaggle.json to kaggle.json kaggle.json
!kaggle datasets download -d tongpython/cat-and-dog
!unzip cat-and-dog.zip
train_data_dir = 'training_set/training_set'
test_data_dir = 'test_set/test_set'
class CatsDogs(Dataset):
def __init__(self, folder):
cats = glob(folder+'/cats/*.jpg')
dogs = glob(folder+'/dogs/*.jpg')
self.fpaths = cats[:500] + dogs[:500]
self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
from random import shuffle, seed; seed(10); shuffle(self.fpaths)
self.targets = [fpath.split('/')[-1].startswith('dog') for fpath in self.fpaths]
def __len__(self): return len(self.fpaths)
def __getitem__(self, ix):
f = self.fpaths[ix]
target = self.targets[ix]
im = (cv2.imread(f)[:,:,::-1])
im = cv2.resize(im, (224,224))
im = torch.tensor(im/255)
im = im.permute(2,0,1)
im = self.normalize(im)
return im.float().to(device), torch.tensor([target]).float().to(device)
data = CatsDogs(train_data_dir)
im, label = data[200]
plt.imshow(im.permute(1,2,0).cpu())
print(label)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
tensor([0.], device='cuda:0')
def get_model():
model = models.vgg16(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
model.classifier = nn.Sequential(nn.Flatten(),
nn.Linear(512, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 1),
nn.Sigmoid())
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr= 1e-3)
return model.to(device), loss_fn, optimizer
!pip install torch_summary
from torchsummary import summary
model, criterion, optimizer = get_model()
summary(model, torch.zeros(1,3,224,224))
Requirement already satisfied: torch_summary in /usr/local/lib/python3.6/dist-packages (1.4.3) ========================================================================================== Layer (type:depth-idx) Output Shape Param # ========================================================================================== ├─Sequential: 1-1 [-1, 512, 7, 7] -- | └─Conv2d: 2-1 [-1, 64, 224, 224] (1,792) | └─ReLU: 2-2 [-1, 64, 224, 224] -- | └─Conv2d: 2-3 [-1, 64, 224, 224] (36,928) | └─ReLU: 2-4 [-1, 64, 224, 224] -- | └─MaxPool2d: 2-5 [-1, 64, 112, 112] -- | └─Conv2d: 2-6 [-1, 128, 112, 112] (73,856) | └─ReLU: 2-7 [-1, 128, 112, 112] -- | └─Conv2d: 2-8 [-1, 128, 112, 112] (147,584) | └─ReLU: 2-9 [-1, 128, 112, 112] -- | └─MaxPool2d: 2-10 [-1, 128, 56, 56] -- | └─Conv2d: 2-11 [-1, 256, 56, 56] (295,168) | └─ReLU: 2-12 [-1, 256, 56, 56] -- | └─Conv2d: 2-13 [-1, 256, 56, 56] (590,080) | └─ReLU: 2-14 [-1, 256, 56, 56] -- | └─Conv2d: 2-15 [-1, 256, 56, 56] (590,080) | └─ReLU: 2-16 [-1, 256, 56, 56] -- | └─MaxPool2d: 2-17 [-1, 256, 28, 28] -- | └─Conv2d: 2-18 [-1, 512, 28, 28] (1,180,160) | └─ReLU: 2-19 [-1, 512, 28, 28] -- | └─Conv2d: 2-20 [-1, 512, 28, 28] (2,359,808) | └─ReLU: 2-21 [-1, 512, 28, 28] -- | └─Conv2d: 2-22 [-1, 512, 28, 28] (2,359,808) | └─ReLU: 2-23 [-1, 512, 28, 28] -- | └─MaxPool2d: 2-24 [-1, 512, 14, 14] -- | └─Conv2d: 2-25 [-1, 512, 14, 14] (2,359,808) | └─ReLU: 2-26 [-1, 512, 14, 14] -- | └─Conv2d: 2-27 [-1, 512, 14, 14] (2,359,808) | └─ReLU: 2-28 [-1, 512, 14, 14] -- | └─Conv2d: 2-29 [-1, 512, 14, 14] (2,359,808) | └─ReLU: 2-30 [-1, 512, 14, 14] -- | └─MaxPool2d: 2-31 [-1, 512, 7, 7] -- ├─AdaptiveAvgPool2d: 1-2 [-1, 512, 1, 1] -- ├─Sequential: 1-3 [-1, 1] -- | └─Flatten: 2-32 [-1, 512] -- | └─Linear: 2-33 [-1, 128] 65,664 | └─ReLU: 2-34 [-1, 128] -- | └─Dropout: 2-35 [-1, 128] -- | └─Linear: 2-36 [-1, 1] 129 | └─Sigmoid: 2-37 [-1, 1] -- ========================================================================================== Total params: 14,780,481 Trainable params: 65,793 Non-trainable params: 14,714,688 Total mult-adds (G): 15.36 ========================================================================================== Input size (MB): 0.57 Forward/backward pass size (MB): 103.36 Params size (MB): 56.38 Estimated Total Size (MB): 160.32 ==========================================================================================
========================================================================================== Layer (type:depth-idx) Output Shape Param # ========================================================================================== ├─Sequential: 1-1 [-1, 512, 7, 7] -- | └─Conv2d: 2-1 [-1, 64, 224, 224] (1,792) | └─ReLU: 2-2 [-1, 64, 224, 224] -- | └─Conv2d: 2-3 [-1, 64, 224, 224] (36,928) | └─ReLU: 2-4 [-1, 64, 224, 224] -- | └─MaxPool2d: 2-5 [-1, 64, 112, 112] -- | └─Conv2d: 2-6 [-1, 128, 112, 112] (73,856) | └─ReLU: 2-7 [-1, 128, 112, 112] -- | └─Conv2d: 2-8 [-1, 128, 112, 112] (147,584) | └─ReLU: 2-9 [-1, 128, 112, 112] -- | └─MaxPool2d: 2-10 [-1, 128, 56, 56] -- | └─Conv2d: 2-11 [-1, 256, 56, 56] (295,168) | └─ReLU: 2-12 [-1, 256, 56, 56] -- | └─Conv2d: 2-13 [-1, 256, 56, 56] (590,080) | └─ReLU: 2-14 [-1, 256, 56, 56] -- | └─Conv2d: 2-15 [-1, 256, 56, 56] (590,080) | └─ReLU: 2-16 [-1, 256, 56, 56] -- | └─MaxPool2d: 2-17 [-1, 256, 28, 28] -- | └─Conv2d: 2-18 [-1, 512, 28, 28] (1,180,160) | └─ReLU: 2-19 [-1, 512, 28, 28] -- | └─Conv2d: 2-20 [-1, 512, 28, 28] (2,359,808) | └─ReLU: 2-21 [-1, 512, 28, 28] -- | └─Conv2d: 2-22 [-1, 512, 28, 28] (2,359,808) | └─ReLU: 2-23 [-1, 512, 28, 28] -- | └─MaxPool2d: 2-24 [-1, 512, 14, 14] -- | └─Conv2d: 2-25 [-1, 512, 14, 14] (2,359,808) | └─ReLU: 2-26 [-1, 512, 14, 14] -- | └─Conv2d: 2-27 [-1, 512, 14, 14] (2,359,808) | └─ReLU: 2-28 [-1, 512, 14, 14] -- | └─Conv2d: 2-29 [-1, 512, 14, 14] (2,359,808) | └─ReLU: 2-30 [-1, 512, 14, 14] -- | └─MaxPool2d: 2-31 [-1, 512, 7, 7] -- ├─AdaptiveAvgPool2d: 1-2 [-1, 512, 1, 1] -- ├─Sequential: 1-3 [-1, 1] -- | └─Flatten: 2-32 [-1, 512] -- | └─Linear: 2-33 [-1, 128] 65,664 | └─ReLU: 2-34 [-1, 128] -- | └─Dropout: 2-35 [-1, 128] -- | └─Linear: 2-36 [-1, 1] 129 | └─Sigmoid: 2-37 [-1, 1] -- ========================================================================================== Total params: 14,780,481 Trainable params: 65,793 Non-trainable params: 14,714,688 Total mult-adds (G): 15.36 ========================================================================================== Input size (MB): 0.57 Forward/backward pass size (MB): 103.36 Params size (MB): 56.38 Estimated Total Size (MB): 160.32 ==========================================================================================
def train_batch(x, y, model, opt, loss_fn):
model.train()
prediction = model(x)
batch_loss = loss_fn(prediction, y)
batch_loss.backward()
optimizer.step()
optimizer.zero_grad()
return batch_loss.item()
@torch.no_grad()
def accuracy(x, y, model):
model.eval()
prediction = model(x)
is_correct = (prediction > 0.5) == y
return is_correct.cpu().numpy().tolist()
def get_data():
train = CatsDogs(train_data_dir)
trn_dl = DataLoader(train, batch_size=32, shuffle=True, drop_last = True)
val = CatsDogs(test_data_dir)
val_dl = DataLoader(val, batch_size=32, shuffle=True, drop_last = True)
return trn_dl, val_dl
trn_dl, val_dl = get_data()
model, loss_fn, optimizer = get_model()
train_losses, train_accuracies = [], []
val_accuracies = []
for epoch in range(5):
print(f" epoch {epoch + 1}/5")
train_epoch_losses, train_epoch_accuracies = [], []
val_epoch_accuracies = []
for ix, batch in enumerate(iter(trn_dl)):
x, y = batch
batch_loss = train_batch(x, y, model, optimizer, loss_fn)
train_epoch_losses.append(batch_loss)
train_epoch_loss = np.array(train_epoch_losses).mean()
for ix, batch in enumerate(iter(trn_dl)):
x, y = batch
is_correct = accuracy(x, y, model)
train_epoch_accuracies.extend(is_correct)
train_epoch_accuracy = np.mean(train_epoch_accuracies)
for ix, batch in enumerate(iter(val_dl)):
x, y = batch
val_is_correct = accuracy(x, y, model)
val_epoch_accuracies.extend(val_is_correct)
val_epoch_accuracy = np.mean(val_epoch_accuracies)
train_losses.append(train_epoch_loss)
train_accuracies.append(train_epoch_accuracy)
val_accuracies.append(val_epoch_accuracy)
epoch 1/5 epoch 2/5 epoch 3/5 epoch 4/5 epoch 5/5
epochs = np.arange(5)+1
import matplotlib.ticker as mtick
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
%matplotlib inline
plt.plot(epochs, train_accuracies, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracies, 'r', label='Validation accuracy')
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.title('Training and validation accuracy with VGG16 \nand 1K training data points')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.ylim(0.95,1)
plt.gca().set_yticklabels(['{:.0f}%'.format(x*100) for x in plt.gca().get_yticks()])
plt.legend()
plt.grid('off')
plt.show()