{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import cv2\n", "from torch.utils.data import DataLoader, Dataset\n", "from torch.utils.data import random_split\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "import torchvision\n", "from torch.utils.tensorboard import SummaryWriter\n", "\n", "writer = SummaryWriter()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class TreesDataset(Dataset):\n", " def __init__(self, data_links) -> None:\n", " self.X, self.Y = readData(data_links)\n", "\n", " def __len__(self):\n", " return len(self.X)\n", "\n", " def __getitem__(self, index):\n", " return (self.X[index], self.Y[index])\n", "\n", "\n", "class Net(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", " self.conv1 = nn.Conv2d(3, 6, 5)\n", " self.pool = nn.MaxPool2d(2, 2)\n", " self.conv2 = nn.Conv2d(6, 16, 5)\n", " self.fc1 = nn.Linear(3264, 120)\n", " self.fc2 = nn.Linear(120, 84)\n", " self.fc3 = nn.Linear(84, 2)\n", "\n", " def forward(self, x):\n", " x = self.pool(F.relu(self.conv1(x)))\n", " x = self.pool(F.relu(self.conv2(x)))\n", " x = torch.flatten(x, 1)\n", " x = F.relu(self.fc1(x))\n", " x = F.relu(self.fc2(x))\n", " x = self.fc3(x)\n", " return x" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def create_datalinks(root_dir):\n", " data_links = os.listdir(root_dir)\n", " data_links = [root_dir + \"/\" + x for x in data_links]\n", " return data_links\n", "\n", "def preprocess(img):\n", " scale_percent = 10\n", " width = int(img.shape[1] * scale_percent / 100)\n", " height = int(img.shape[0] * scale_percent / 100)\n", " dim = (width, height)\n", " resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)\n", " resized = torchvision.transforms.functional.to_tensor(resized)\n", " return resized\n", "\n", "def readData(data_links):\n", " x, y = [], []\n", " for link in data_links:\n", " img = cv2.imread(link, cv2.IMREAD_COLOR)\n", " img = preprocess(img)\n", " if(\"ground\" in link):\n", " label = 1\n", " elif(\"AS12\" in link):\n", " label = 0\n", " else:\n", " label = 0\n", " x.append(img)\n", " y.append(label)\n", "\n", " return x, y" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "links_3_plus_ground = create_datalinks(\"new_data/AS12_3\") + create_datalinks(\"new_data/ground\")\n", "\n", "dataset = TreesDataset(links_3_plus_ground)\n", "\n", "train_set, test_set = random_split(dataset, [300, 50], generator=torch.Generator().manual_seed(42))\n", "\n", "trainloader = DataLoader(train_set, batch_size=10, shuffle=True, num_workers=2)\n", "testloader = DataLoader(test_set, batch_size=10, shuffle=True, num_workers=2)\n", "\n", "classes = ('tree', 'ground')\n", "epochs_num = 15\n", "\n", "net = Net()\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1, 1] loss: 0.074\n", "[1, 11] loss: 0.725\n", "[1, 21] loss: 0.695\n", "[2, 1] loss: 0.063\n", "[2, 11] loss: 0.606\n", "[2, 21] loss: 0.594\n", "[3, 1] loss: 0.071\n", "[3, 11] loss: 0.405\n", "[3, 21] loss: 0.477\n", "[4, 1] loss: 0.015\n", "[4, 11] loss: 0.327\n", "[4, 21] loss: 0.484\n", "[5, 1] loss: 0.052\n", "[5, 11] loss: 0.486\n", "[5, 21] loss: 0.370\n", "[6, 1] loss: 0.014\n", "[6, 11] loss: 0.454\n", "[6, 21] loss: 0.317\n", "[7, 1] loss: 0.052\n", "[7, 11] loss: 0.434\n", "[7, 21] loss: 0.467\n", "[8, 1] loss: 0.051\n", "[8, 11] loss: 0.438\n", "[8, 21] loss: 0.457\n", "[9, 1] loss: 0.071\n", "[9, 11] loss: 0.422\n", "[9, 21] loss: 0.358\n", "[10, 1] loss: 0.013\n", "[10, 11] loss: 0.447\n", "[10, 21] loss: 0.373\n", "[11, 1] loss: 0.052\n", "[11, 11] loss: 0.314\n", "[11, 21] loss: 0.387\n", "[12, 1] loss: 0.037\n", "[12, 11] loss: 0.437\n", "[12, 21] loss: 0.395\n", "[13, 1] loss: 0.013\n", "[13, 11] loss: 0.431\n", "[13, 21] loss: 0.423\n", "[14, 1] loss: 0.017\n", "[14, 11] loss: 0.466\n", "[14, 21] loss: 0.371\n", "[15, 1] loss: 0.032\n", "[15, 11] loss: 0.441\n", "[15, 21] loss: 0.324\n", "Finished Training\n" ] } ], "source": [ "\n", "\n", "for epoch in range(epochs_num):\n", " correct = 0\n", " total = 0\n", " running_loss = 0.0\n", " for i, data in enumerate(trainloader, 0):\n", " inputs, labels = data\n", " optimizer.zero_grad()\n", " outputs = net(inputs)\n", " loss = criterion(outputs, labels)\n", " loss.backward()\n", " optimizer.step()\n", " running_loss += loss.item()\n", "\n", " _, predicted = torch.max(outputs.data, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", "\n", " if i % 10 == 0: \n", " print('[%d, %5d] loss: %.3f' %\n", " (epoch + 1, i + 1, running_loss / 10))\n", " running_loss = 0.0\n", "\n", " writer.add_scalar(\"Loss/train\", loss.item(), i + epoch)\n", " writer.add_scalar(\"Accuracy/train\", correct/total, i + epoch)\n", "\n", "print('Finished Training')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 84 %\n" ] } ], "source": [ "correct = 0\n", "total = 0\n", "i=0\n", "with torch.no_grad():\n", " for data in testloader:\n", " images, labels = data\n", " outputs = net(images)\n", " loss = criterion(outputs, labels)\n", " _, predicted = torch.max(outputs.data, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", " writer.add_scalar(\"Accuracy/test\", correct/total, i + epoch)\n", " writer.add_scalar(\"Loss/test\", loss.item(), i + epoch)\n", " i += 1\n", "\n", "print('Accuracy : %d %%' % (100 * correct / total))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "\n", "images, labels = next(iter(trainloader))\n", "grid = torchvision.utils.make_grid(images)\n", "writer.add_image('images', grid, 0)\n", "writer.add_graph(net, images)\n", "writer.close()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "multi = create_datalinks(\"new_data/multi\")\n", "multiset = TreesDataset(multi)\n", "multiloader = DataLoader(multiset, batch_size=10, shuffle=True, num_workers=2)\n", "criterion2 = nn.Softmax(dim=1)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[0.8594, 0.1406],\n", " [0.8276, 0.1724],\n", " [0.8850, 0.1150],\n", " [0.8887, 0.1113],\n", " [0.8737, 0.1263],\n", " [0.8814, 0.1186],\n", " [0.8911, 0.1089],\n", " [0.8364, 0.1636],\n", " [0.8846, 0.1154],\n", " [0.8726, 0.1274]])\n", "tensor([[0.8452, 0.1548],\n", " [0.8533, 0.1467],\n", " [0.8536, 0.1464],\n", " [0.8593, 0.1407],\n", " [0.8557, 0.1443],\n", " [0.8811, 0.1189],\n", " [0.8727, 0.1273],\n", " [0.8529, 0.1471],\n", " [0.9053, 0.0947],\n", " [0.8824, 0.1176]])\n", "tensor([[0.8593, 0.1407],\n", " [0.8952, 0.1048],\n", " [0.8780, 0.1220],\n", " [0.8724, 0.1276],\n", " [0.8451, 0.1549],\n", " [0.8424, 0.1576],\n", " [0.8332, 0.1668],\n", " [0.8567, 0.1433],\n", " [0.8487, 0.1513],\n", " [0.8839, 0.1161]])\n", "tensor([[0.8759, 0.1241],\n", " [0.8340, 0.1660],\n", " [0.9141, 0.0859],\n", " [0.9075, 0.0925],\n", " [0.8674, 0.1326],\n", " [0.8431, 0.1569],\n", " [0.8933, 0.1067],\n", " [0.8475, 0.1525],\n", " [0.8363, 0.1637],\n", " [0.8789, 0.1211]])\n", "tensor([[0.8495, 0.1505],\n", " [0.8402, 0.1598],\n", " [0.8482, 0.1518],\n", " [0.8470, 0.1530],\n", " [0.8733, 0.1267],\n", " [0.8362, 0.1638],\n", " [0.8909, 0.1091],\n", " [0.8568, 0.1432],\n", " [0.8577, 0.1423],\n", " [0.8678, 0.1322]])\n", "1.0\n" ] } ], "source": [ "correct = 0\n", "total = 0\n", "i=0\n", "with torch.no_grad():\n", " for data in multiloader:\n", " images, labels = data\n", " outputs = net(images)\n", " loss = criterion2(outputs)\n", " loss2 = criterion(outputs, labels)\n", " print(loss)\n", " _, predicted = torch.max(outputs, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", " writer.add_scalar(\"Multi/Accuracy\", correct/total, i)\n", " writer.add_scalar(\"Multi/Loss\", loss2.item(), i + epoch)\n", "\n", " i += 1\n", "\n", "print(correct/total)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "from IPython.display import Image\n", "from torchvision import models\n", "from torchsummary import summary\n", "from matplotlib import pyplot" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------------------------------\n", " Layer (type) Output Shape Param #\n", "================================================================\n", " Conv2d-1 [-1, 6, 76, 56] 456\n", " MaxPool2d-2 [-1, 6, 38, 28] 0\n", " Conv2d-3 [-1, 16, 34, 24] 2,416\n", " MaxPool2d-4 [-1, 16, 17, 12] 0\n", " Linear-5 [-1, 120] 391,800\n", " Linear-6 [-1, 84] 10,164\n", " Linear-7 [-1, 2] 170\n", "================================================================\n", "Total params: 405,006\n", "Trainable params: 405,006\n", "Non-trainable params: 0\n", "----------------------------------------------------------------\n", "Input size (MB): 0.05\n", "Forward/backward pass size (MB): 0.37\n", "Params size (MB): 1.54\n", "Estimated Total Size (MB): 1.97\n", "----------------------------------------------------------------\n" ] } ], "source": [ "summary(net, (3, 80, 60))\n", "link = \"new_data/AS12_3/AS12_3_1.png\"" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 6, 28, 38])\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "img = cv2.imread(link, cv2.IMREAD_COLOR)\n", "img = preprocess(img)\n", "img = img.view(1, 3, 60, 80)\n", "\n", "output = net.pool(F.relu(net.conv1(img)))\n", "print(output.size())\n", "\n", "square = 2\n", "ix = 1\n", "pyplot.figure(figsize=(16, 16))\n", "with torch.no_grad():\n", " for _ in range(square):\n", " for _ in range(square):\n", " ax = pyplot.subplot(square, square, ix)\n", " ax.set_xticks([])\n", " ax.set_yticks([])\n", " pyplot.imshow(output[0, ix-1, :, :])\n", " ix += 1\n", "\n", "pyplot.show()" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "img = cv2.imread(link, cv2.IMREAD_COLOR)\n", "img = preprocess(img)\n", "img = img.view(1, 3, 60, 80)\n", "\n", "output = net.pool(F.relu(net.conv1(img)))\n", "output = net.pool(F.relu(net.conv2(output)))\n", "output.size()\n", "\n", "square = 4\n", "ix = 1\n", "pyplot.figure(figsize=(16, 16))\n", "with torch.no_grad():\n", " for _ in range(square):\n", " for _ in range(square):\n", " ax = pyplot.subplot(square, square, ix)\n", " ax.set_xticks([])\n", " ax.set_yticks([])\n", " pyplot.imshow(output[0, ix-1, :, :])\n", " ix += 1\n", "\n", "pyplot.show()" ] } ], "metadata": { "interpreter": { "hash": "3c791669b07b322e46c2c9e5f9e8a4c39f8cc206c386431b147b2f78281d9ccb" }, "kernelspec": { "display_name": "Python 3.8.10 64-bit ('venv': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }