begin image recognition neural network implementation
This commit is contained in:
parent
b9fba20676
commit
d4e382a7f0
42
neural_network/datasets.py
Normal file
42
neural_network/datasets.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import torchvision
|
||||||
|
import torch
|
||||||
|
import torchvision.transforms as transforms
|
||||||
|
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
BATCH_SIZE = 64
|
||||||
|
|
||||||
|
|
||||||
|
train_transform = transforms.Compose([
|
||||||
|
transforms.Resize((224, 224)), #validate that all images are 224x244
|
||||||
|
transforms.RandomHorizontalFlip(p=0.5),
|
||||||
|
transforms.RandomVerticalFlip(p=0.5),
|
||||||
|
transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
|
||||||
|
transforms.RandomRotation(degrees=(30, 70)), #random effects are applied to prevent overfitting
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(
|
||||||
|
mean=[0.5, 0.5, 0.5],
|
||||||
|
std=[0.5, 0.5, 0.5]
|
||||||
|
)
|
||||||
|
])
|
||||||
|
|
||||||
|
valid_transform = transforms.Compose([
|
||||||
|
transforms.Resize((224, 224)),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(
|
||||||
|
mean=[0.5, 0.5, 0.5],
|
||||||
|
std=[0.5, 0.5, 0.5]
|
||||||
|
)
|
||||||
|
])
|
||||||
|
|
||||||
|
train_dataset = torchvision.datasets.ImageFolder(root='./Vegetable Images/train', transform=train_transform)
|
||||||
|
|
||||||
|
validation_dataset = torchvision.datasets.ImageFolder(root='./Vegetable Images/validation', transform=valid_transform)
|
||||||
|
|
||||||
|
train_loader = DataLoader(
|
||||||
|
train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True
|
||||||
|
)
|
||||||
|
|
||||||
|
valid_loader = DataLoader(
|
||||||
|
validation_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True
|
||||||
|
)
|
70
neural_network/inference.py
Normal file
70
neural_network/inference.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
import torchvision.transforms as transforms
|
||||||
|
import argparse
|
||||||
|
from model import CNNModel
|
||||||
|
# construct the argument parser
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('-i', '--input',
|
||||||
|
default='',
|
||||||
|
help='path to the input image')
|
||||||
|
args = vars(parser.parse_args())
|
||||||
|
|
||||||
|
# the computation device
|
||||||
|
device = ('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
# list containing all the class labels
|
||||||
|
labels = [
|
||||||
|
'bean', 'bitter gourd', 'bottle gourd', 'brinjal', 'broccoli',
|
||||||
|
'cabbage', 'capsicum', 'carrot', 'cauliflower', 'cucumber',
|
||||||
|
'papaya', 'potato', 'pumpkin', 'radish', 'tomato'
|
||||||
|
]
|
||||||
|
|
||||||
|
# initialize the model and load the trained weights
|
||||||
|
model = CNNModel().to(device)
|
||||||
|
checkpoint = torch.load('outputs/model.pth', map_location=device)
|
||||||
|
model.load_state_dict(checkpoint['model_state_dict'])
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
# define preprocess transforms
|
||||||
|
transform = transforms.Compose([
|
||||||
|
transforms.ToPILImage(),
|
||||||
|
transforms.Resize(224),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(
|
||||||
|
mean=[0.5, 0.5, 0.5],
|
||||||
|
std=[0.5, 0.5, 0.5]
|
||||||
|
)
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
# read and preprocess the image
|
||||||
|
image = cv2.imread(args['input'])
|
||||||
|
# get the ground truth class
|
||||||
|
gt_class = args['input'].split('/')[-2]
|
||||||
|
orig_image = image.copy()
|
||||||
|
# convert to RGB format
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
image = transform(image)
|
||||||
|
# add batch dimension
|
||||||
|
image = torch.unsqueeze(image, 0)
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = model(image.to(device))
|
||||||
|
output_label = torch.topk(outputs, 1)
|
||||||
|
pred_class = labels[int(output_label.indices)]
|
||||||
|
cv2.putText(orig_image,
|
||||||
|
f"GT: {gt_class}",
|
||||||
|
(10, 25),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.6, (0, 255, 0), 2, cv2.LINE_AA
|
||||||
|
)
|
||||||
|
cv2.putText(orig_image,
|
||||||
|
f"Pred: {pred_class}",
|
||||||
|
(10, 55),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.6, (0, 0, 255), 2, cv2.LINE_AA
|
||||||
|
)
|
||||||
|
print(f"GT: {gt_class}, pred: {pred_class}")
|
||||||
|
cv2.imshow('Result', orig_image)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.imwrite(f"outputs/{gt_class}{args['input'].split('/')[-1].split('.')[0]}.png",
|
||||||
|
orig_image)
|
24
neural_network/model.py
Normal file
24
neural_network/model.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class CNNModel(nn.Module): #model of the CNN type
|
||||||
|
def __init__(self):
|
||||||
|
super(CNNModel, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(3, 32, 5)
|
||||||
|
self.conv2 = nn.Conv2d(32, 64, 5)
|
||||||
|
self.conv3 = nn.Conv2d(64, 128, 3)
|
||||||
|
self.conv4 = nn.Conv2d(128, 256, 5)
|
||||||
|
|
||||||
|
self.fc1 = nn.Linear(256, 50)
|
||||||
|
|
||||||
|
self.pool = nn.MaxPool2d(2, 2)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.pool(F.relu(self.conv1(x)))
|
||||||
|
x = self.pool(F.relu(self.conv2(x)))
|
||||||
|
x = self.pool(F.relu(self.conv3(x)))
|
||||||
|
x = self.pool(F.relu(self.conv4(x)))
|
||||||
|
bs, _, _, _ = x.shape
|
||||||
|
x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
|
||||||
|
x = self.fc1(x)
|
||||||
|
return x
|
BIN
neural_network/outputs/accuracy.png
Normal file
BIN
neural_network/outputs/accuracy.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 40 KiB |
BIN
neural_network/outputs/loss.png
Normal file
BIN
neural_network/outputs/loss.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 41 KiB |
BIN
neural_network/outputs/model.pth
Normal file
BIN
neural_network/outputs/model.pth
Normal file
Binary file not shown.
119
neural_network/train.py
Normal file
119
neural_network/train.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
import torch
|
||||||
|
import argparse
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
import time
|
||||||
|
from tqdm.auto import tqdm
|
||||||
|
from model import CNNModel
|
||||||
|
from datasets import train_loader, valid_loader
|
||||||
|
from utils import save_model, save_plots
|
||||||
|
|
||||||
|
# construct the argument parser
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('-e', '--epochs', type=int, default=20,
|
||||||
|
help='number of epochs to train our network for')
|
||||||
|
args = vars(parser.parse_args())
|
||||||
|
|
||||||
|
|
||||||
|
lr = 1e-3
|
||||||
|
epochs = args['epochs']
|
||||||
|
device = ('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
print(f"Computation device: {device}\n")
|
||||||
|
|
||||||
|
model = CNNModel().to(device)
|
||||||
|
print(model)
|
||||||
|
|
||||||
|
total_params = sum(p.numel() for p in model.parameters())
|
||||||
|
print(f"{total_params:,} total parameters.")
|
||||||
|
total_trainable_params = sum(
|
||||||
|
p.numel() for p in model.parameters() if p.requires_grad)
|
||||||
|
print(f"{total_trainable_params:,} training parameters.")
|
||||||
|
# optimizer
|
||||||
|
optimizer = optim.Adam(model.parameters(), lr=lr)
|
||||||
|
# loss function
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
|
||||||
|
# training
|
||||||
|
def train(model, trainloader, optimizer, criterion):
|
||||||
|
model.train()
|
||||||
|
print('Training')
|
||||||
|
train_running_loss = 0.0
|
||||||
|
train_running_correct = 0
|
||||||
|
counter = 0
|
||||||
|
for i, data in tqdm(enumerate(trainloader), total=len(trainloader)):
|
||||||
|
counter += 1
|
||||||
|
image, labels = data
|
||||||
|
image = image.to(device)
|
||||||
|
labels = labels.to(device)
|
||||||
|
optimizer.zero_grad()
|
||||||
|
# forward pass
|
||||||
|
outputs = model(image)
|
||||||
|
# calculate the loss
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
train_running_loss += loss.item()
|
||||||
|
# calculate the accuracy
|
||||||
|
_, preds = torch.max(outputs.data, 1)
|
||||||
|
train_running_correct += (preds == labels).sum().item()
|
||||||
|
# backpropagation
|
||||||
|
loss.backward()
|
||||||
|
# update the optimizer parameters
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# loss and accuracy for the complete epoch
|
||||||
|
epoch_loss = train_running_loss / counter
|
||||||
|
epoch_acc = 100. * (train_running_correct / len(trainloader.dataset))
|
||||||
|
return epoch_loss, epoch_acc
|
||||||
|
|
||||||
|
# validation
|
||||||
|
def validate(model, testloader, criterion):
|
||||||
|
model.eval()
|
||||||
|
print('Validation')
|
||||||
|
valid_running_loss = 0.0
|
||||||
|
valid_running_correct = 0
|
||||||
|
counter = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for i, data in tqdm(enumerate(testloader), total=len(testloader)):
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
image, labels = data
|
||||||
|
image = image.to(device)
|
||||||
|
labels = labels.to(device)
|
||||||
|
# forward pass
|
||||||
|
outputs = model(image)
|
||||||
|
# calculate the loss
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
valid_running_loss += loss.item()
|
||||||
|
# calculate the accuracy
|
||||||
|
_, preds = torch.max(outputs.data, 1)
|
||||||
|
valid_running_correct += (preds == labels).sum().item()
|
||||||
|
|
||||||
|
# loss and accuracy for the complete epoch
|
||||||
|
epoch_loss = valid_running_loss / counter
|
||||||
|
epoch_acc = 100. * (valid_running_correct / len(testloader.dataset))
|
||||||
|
return epoch_loss, epoch_acc
|
||||||
|
|
||||||
|
# lists to keep track of losses and accuracies
|
||||||
|
train_loss, valid_loss = [], []
|
||||||
|
train_acc, valid_acc = [], []
|
||||||
|
# start the training
|
||||||
|
for epoch in range(epochs):
|
||||||
|
print(f"[INFO]: Epoch {epoch+1} of {epochs}")
|
||||||
|
train_epoch_loss, train_epoch_acc = train(model, train_loader,
|
||||||
|
optimizer, criterion)
|
||||||
|
valid_epoch_loss, valid_epoch_acc = validate(model, valid_loader,
|
||||||
|
criterion)
|
||||||
|
train_loss.append(train_epoch_loss)
|
||||||
|
valid_loss.append(valid_epoch_loss)
|
||||||
|
train_acc.append(train_epoch_acc)
|
||||||
|
valid_acc.append(valid_epoch_acc)
|
||||||
|
print(f"Training loss: {train_epoch_loss:.3f}, training acc: {train_epoch_acc:.3f}")
|
||||||
|
print(f"Validation loss: {valid_epoch_loss:.3f}, validation acc: {valid_epoch_acc:.3f}")
|
||||||
|
print('-'*50)
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
# save the trained model weights
|
||||||
|
save_model(epochs, model, optimizer, criterion)
|
||||||
|
# save the loss and accuracy plots
|
||||||
|
save_plots(train_acc, valid_acc, train_loss, valid_loss)
|
||||||
|
print('TRAINING COMPLETE')
|
49
neural_network/utils.py
Normal file
49
neural_network/utils.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import torch
|
||||||
|
import matplotlib
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
matplotlib.style.use('ggplot')
|
||||||
|
|
||||||
|
def save_model(epochs, model, optimizer, criterion):
|
||||||
|
"""
|
||||||
|
Function to save the trained model to disk.
|
||||||
|
"""
|
||||||
|
torch.save({
|
||||||
|
'epoch': epochs,
|
||||||
|
'model_state_dict': model.state_dict(),
|
||||||
|
'optimizer_state_dict': optimizer.state_dict(),
|
||||||
|
'loss': criterion,
|
||||||
|
}, 'outputs/model.pth')
|
||||||
|
|
||||||
|
def save_plots(train_acc, valid_acc, train_loss, valid_loss):
|
||||||
|
"""
|
||||||
|
Function to save the loss and accuracy plots to disk.
|
||||||
|
"""
|
||||||
|
# accuracy plots
|
||||||
|
plt.figure(figsize=(10, 7))
|
||||||
|
plt.plot(
|
||||||
|
train_acc, color='green', linestyle='-',
|
||||||
|
label='train accuracy'
|
||||||
|
)
|
||||||
|
plt.plot(
|
||||||
|
valid_acc, color='blue', linestyle='-',
|
||||||
|
label='validataion accuracy'
|
||||||
|
)
|
||||||
|
plt.xlabel('Epochs')
|
||||||
|
plt.ylabel('Accuracy')
|
||||||
|
plt.legend()
|
||||||
|
plt.savefig('outputs/accuracy.png')
|
||||||
|
|
||||||
|
# loss plots
|
||||||
|
plt.figure(figsize=(10, 7))
|
||||||
|
plt.plot(
|
||||||
|
train_loss, color='orange', linestyle='-',
|
||||||
|
label='train loss'
|
||||||
|
)
|
||||||
|
plt.plot(
|
||||||
|
valid_loss, color='red', linestyle='-',
|
||||||
|
label='validataion loss'
|
||||||
|
)
|
||||||
|
plt.xlabel('Epochs')
|
||||||
|
plt.ylabel('Loss')
|
||||||
|
plt.legend()
|
||||||
|
plt.savefig('outputs/loss.png')
|
Loading…
Reference in New Issue
Block a user