raport

2020-05-18 21:34:26 +02:00 · 2020-05-18 21:34:26 +02:00 · cedb309f02
commit cedb309f02
parent c0b52698a7
5 changed files with 108 additions and 83 deletions
--- a/raport_adamB.md
+++ b/raport_adamB.md
@ -0,0 +1,101 @@
+# Sztuczna Inteligencja
+
+**Temat projektu:** Inteligenta Śmieciarka
+
+**Zespół:** Kacper Borkowski, Adam Borowski, Adam Osiowy
+
+**Podprojekt:** Adam Borowski
+
+---
+
+## 1. Temat podprojektu:
+
+Celem projektu było utworzenie klasyfikatora rodzajów danych wejściowych(śmieci) na podstawie zdjęć. Do tego celu wykorzystano bibliotekę [PyTorch](https://pytorch.org/docs/stable/index.html). Cały podprojekt opiera się na utworzeniu sieci konwolucyjnej i przetworzeniu inputu przez kolejne jej warstwy.
+
+## 2. Model sieci:
+
+```
+class Net(nn.Module):  # klasa Net dziedziczaca po klasie bazowej nn.Module
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 71 * 71, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 4)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = x.view(x.size(0), 16 * 71 * 71)
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+```
+
+- conv1, conv2 – warstwy konwolucyjna, rozmiar filtra 3×3, posiadające 3 kanały wejściowe (RGB) i kanały wyjściowe dla następnych warstw
+- pool - operacja `max-poolingu` - wyciaganie najwazniejszej informacji z zadanego obszaru obrazu
+  ![model](resources/screenShots/maxpool.png)
+- fc1, fc2, fc3 - warstwy liniowe - `full connection layers` - w odróznieniu od warstw konwolucyjnych, każdy neuron dostaje input o neuronie z poprzedniej warstwy. W warstwie konwolucyjnej neurony wiedzą tylko o określonych neuronach z poprzedniego layera
+  ![model](resources/screenShots/fc.png)
+- metoda `forward` - metoda forward określa cały przepływ(flow) inputu przez warstwy aż do outputu. W pierwszej części tensor danej wejściowej(tensor zdjęcia) przepuszczany jest przez dwie warstwy konwolucyjne i wykonywana jest na nim wcześniej wspomniana operacja `max-poolingu`. W następnej części wypłaszczamy x, wszystkie wymiary przechowujace dane obrazu – 16 kanalow o rozmiarach 71×71 rozciągamy jako jeden długi wektor. Na koniec przepuszczamy tensor przez warstwy liniowe i zwracamy output.
+
+## 3. Trening:
+
+```
+def train():
+    net = Net()
+    trainset = torchvision.datasets.ImageFolder(
+        root='./resources/zbior_uczacy', transform=transform)
+    trainloader = torch.utils.data.DataLoader(
+        trainset, batch_size=1, shuffle=True, num_workers=2)
+
+    classes = ('glass', 'metal', 'paper', 'plastic')
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
+
+    for epoch in range(10):
+        running_loss = 0.0
+        for i, data in enumerate(trainloader, 0):
+            inputs, labels = data
+            optimizer.zero_grad()
+            outputs = net(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item()
+            if i:
+                print('[%d, %5d] loss: %.3f' %
+                      (epoch + 1, i + 1, running_loss))
+                running_loss = 0.0
+
+    print('Finished Training')
+    PATH = './wytrenowaned.pth'
+    torch.save(net.state_dict(), PATH)
+```
+
+- na początku zainicjowano sieć, pobrano zbiór uczący i znormalizowano jego wnętrze, aby każde zdjęcie było pod postacią Tensora(tego wymaga model sieci)
+- następnie zdefiniowano kryterium do wyznaczania jakości klasyfikacji zdjęć do klas i wyznaczono optymalizator(w tym przypadku SGD, moglby tez byc Adam)
+- potem wchodzimy do pętli i iterujemy po data secie, pobieramy inputy, czyścimy gradienty z poprzedniej iteracji, za pomocą algorytmu propagacji wstecznej liczymy pochodne z utraconej wartości, wyswietlamy w konsoli loss z danej iteracji,
+- następnie zapisujemy wytrenowany model
+
+## 4. Przewidywanie:
+
+```
+def predict(img_path):
+    net = Net()
+    PATH = './wytrenowaned.pth'
+    img = Image.open(img_path)
+    pil_to_tensor = transforms.ToTensor()(img).unsqueeze_(0)
+    classes = ('glass', 'metal', 'paper', 'plastic')
+    net.load_state_dict(torch.load(PATH))
+    net.eval()
+    outputs = net(pil_to_tensor)
+    return classes[torch.max(outputs, 1)[1]]
+```
+
+- zainicjowano sieć, wczytano ścieżke, przetransformowano argument funkcji(zdjecie) do porządanego formatu
+- następnie przekazano tensor jako argument do instancji klasy sieci
+- w ostatnim kroku za pomocą funkcji `max` wyciągnięto największą wagę i na jej podstawie rozpoznano klasę
--- a/resources/screenShots/fc.png
+++ b/resources/screenShots/fc.png
--- a/resources/screenShots/maxpool.png
+++ b/resources/screenShots/maxpool.png
--- a/uczenie_adamB.py
+++ b/uczenie_adamB.py
@ -9,27 +9,12 @@ import numpy as np
 import torch.optim as optim
 from PIL import Image

+
 transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


-# def imshow(img):
-#     img = img / 2 + 0.5
-#     npimg = img.numpy()
-#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
-#     plt.show()
-
-
-# dataiter = iter(trainloader)
-# images, labels = dataiter.next()
-
-# # show images
-# imshow(torchvision.utils.make_grid(images))
-# # print labels
-# print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
-
-
 class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
@ -38,7 +23,7 @@ class Net(nn.Module):
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 71 * 71, 120)
        self.fc2 = nn.Linear(120, 84)
-        self.fc3 = nn.Linear(84, 10)
+        self.fc3 = nn.Linear(84, 4)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
@ -50,40 +35,31 @@ class Net(nn.Module):
        return x


-
 def train():
+    net = Net()
    trainset = torchvision.datasets.ImageFolder(
        root='./resources/zbior_uczacy', transform=transform)
    trainloader = torch.utils.data.DataLoader(
-        trainset, batch_size=1, shuffle=True, num_workers=2)
+        trainset, batch_size=2, shuffle=True, num_workers=2)

    classes = ('glass', 'metal', 'paper', 'plastic')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

-    for epoch in range(10):  # loop over the dataset multiple times
-        print("siema")
+    for epoch in range(10):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
-            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
-
-            # zero the parameter gradients
            optimizer.zero_grad()
-
-            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
-
-            # print statistics
            running_loss += loss.item()
-            if i:    # print every 2000 mini-batches
+            if i:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss))
                running_loss = 0.0
-                print("kyrw")

    print('Finished Training')
    PATH = './wytrenowaned.pth'
@ -95,61 +71,9 @@ def predict(img_path):
    PATH = './wytrenowaned.pth'
    img = Image.open(img_path)
    pil_to_tensor = transforms.ToTensor()(img).unsqueeze_(0)
-    if(pil_to_tensor.shape[1] == 1):
-        print(img_path)
    classes = ('glass', 'metal', 'paper', 'plastic')
-    # testset = torchvision.datasets.ImageFolder(
-    #     root='./resources/smieci', transform=transform)
-    # testloader = torch.utils.data.DataLoader(
-    #     testset, batch_size=4, shuffle=True, num_workers=2)
-    # dataiter = iter(testloader)
-    # images, labels = dataiter.next()
-
-
-# print images
-# imshow(torchvision.utils.make_grid(images))
-    # print('GroundTruth: ', ' '.join('%5s' %
-    #                                 classes[labels[j]] for j in range(4)))
-    # print('---')
-    # print(images)
-    # print('---')
    net.load_state_dict(torch.load(PATH))
+    net.eval()
    outputs = net(pil_to_tensor)
    return classes[torch.max(outputs, 1)[1]]

-    # print(classes[torch.max(outputs, 1)[1]])
-    # print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
-    #                               for j in range(1)))
-
-# correct = 0
-# total = 0
-# with torch.no_grad():
-#     for data in testloader:
-#         images, labels = data
-#         outputs = net(images)
-#         _, predicted = torch.max(outputs.data, 1)
-#         total += labels.size(0)
-#         correct += (predicted == labels).sum().item()
-
-# print('Accuracy of the network on the test images: %d %%' % (
-#     100 * correct / total))
-
-# class_correct = list(0. for i in range(4))
-# class_total = list(0. for i in range(4))
-# with torch.no_grad():
-#     for data in testloader:
-#         images, labels = data
-#         outputs = net(images)
-#         _, predicted = torch.max(outputs, 1)
-#         c = (predicted == labels).squeeze()
-#         for i in range(3):
-#             label = labels[i]
-#             print(labels)
-#             class_correct[label] += c[i].item()
-#             class_total[label] += 1
-
-
-# for i in range(4):
-#     print('Accuracy of %5s : %2d %%' % (
-#         classes[i], 100 * class_correct[i] / class_total[i]))
-# train()
--- a/wytrenowaned.pth
+++ b/wytrenowaned.pth