raport

2020-05-18 21:34:26 +02:00 · 2020-05-18 21:34:26 +02:00 · cedb309f02
commit cedb309f02
parent c0b52698a7
5 changed files with 108 additions and 83 deletions
--- a/raport_adamB.md
+++ b/raport_adamB.md
@ -0,0 +1,101 @@
 # Sztuczna Inteligencja
 **Temat projektu:** Inteligenta Śmieciarka
 **Zespół:** Kacper Borkowski, Adam Borowski, Adam Osiowy
 **Podprojekt:** Adam Borowski
 ---
 ## 1. Temat podprojektu:
 Celem projektu było utworzenie klasyfikatora rodzajów danych wejściowych(śmieci) na podstawie zdjęć. Do tego celu wykorzystano bibliotekę [PyTorch](https://pytorch.org/docs/stable/index.html). Cały podprojekt opiera się na utworzeniu sieci konwolucyjnej i przetworzeniu inputu przez kolejne jej warstwy.
 ## 2. Model sieci:
 ```
 class Net(nn.Module):  # klasa Net dziedziczaca po klasie bazowej nn.Module
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 71 * 71, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 4)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), 16 * 71 * 71)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
 ```
 - conv1, conv2 – warstwy konwolucyjna, rozmiar filtra 3×3, posiadające 3 kanały wejściowe (RGB) i kanały wyjściowe dla następnych warstw
 - pool - operacja `max-poolingu` - wyciaganie najwazniejszej informacji z zadanego obszaru obrazu
  ![model](resources/screenShots/maxpool.png)
 - fc1, fc2, fc3 - warstwy liniowe - `full connection layers` - w odróznieniu od warstw konwolucyjnych, każdy neuron dostaje input o neuronie z poprzedniej warstwy. W warstwie konwolucyjnej neurony wiedzą tylko o określonych neuronach z poprzedniego layera
  ![model](resources/screenShots/fc.png)
 - metoda `forward` - metoda forward określa cały przepływ(flow) inputu przez warstwy aż do outputu. W pierwszej części tensor danej wejściowej(tensor zdjęcia) przepuszczany jest przez dwie warstwy konwolucyjne i wykonywana jest na nim wcześniej wspomniana operacja `max-poolingu`. W następnej części wypłaszczamy x, wszystkie wymiary przechowujace dane obrazu – 16 kanalow o rozmiarach 71×71 rozciągamy jako jeden długi wektor. Na koniec przepuszczamy tensor przez warstwy liniowe i zwracamy output.
 ## 3. Trening:
 ```
 def train():
    net = Net()
    trainset = torchvision.datasets.ImageFolder(
        root='./resources/zbior_uczacy', transform=transform)
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=1, shuffle=True, num_workers=2)
    classes = ('glass', 'metal', 'paper', 'plastic')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    for epoch in range(10):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss))
                running_loss = 0.0
    print('Finished Training')
    PATH = './wytrenowaned.pth'
    torch.save(net.state_dict(), PATH)
 ```
 - na początku zainicjowano sieć, pobrano zbiór uczący i znormalizowano jego wnętrze, aby każde zdjęcie było pod postacią Tensora(tego wymaga model sieci)
 - następnie zdefiniowano kryterium do wyznaczania jakości klasyfikacji zdjęć do klas i wyznaczono optymalizator(w tym przypadku SGD, moglby tez byc Adam)
 - potem wchodzimy do pętli i iterujemy po data secie, pobieramy inputy, czyścimy gradienty z poprzedniej iteracji, za pomocą algorytmu propagacji wstecznej liczymy pochodne z utraconej wartości, wyswietlamy w konsoli loss z danej iteracji,
 - następnie zapisujemy wytrenowany model
 ## 4. Przewidywanie:
 ```
 def predict(img_path):
    net = Net()
    PATH = './wytrenowaned.pth'
    img = Image.open(img_path)
    pil_to_tensor = transforms.ToTensor()(img).unsqueeze_(0)
    classes = ('glass', 'metal', 'paper', 'plastic')
    net.load_state_dict(torch.load(PATH))
    net.eval()
    outputs = net(pil_to_tensor)
    return classes[torch.max(outputs, 1)[1]]
 ```
 - zainicjowano sieć, wczytano ścieżke, przetransformowano argument funkcji(zdjecie) do porządanego formatu
 - następnie przekazano tensor jako argument do instancji klasy sieci
 - w ostatnim kroku za pomocą funkcji `max` wyciągnięto największą wagę i na jej podstawie rozpoznano klasę
--- a/resources/screenShots/fc.png
+++ b/resources/screenShots/fc.png
--- a/resources/screenShots/maxpool.png
+++ b/resources/screenShots/maxpool.png
--- a/uczenie_adamB.py
+++ b/uczenie_adamB.py
@ -9,27 +9,12 @@ import numpy as np
 import torch.optim as optim
 from PIL import Image
 transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 # def imshow(img):
 #     img = img / 2 + 0.5
 #     npimg = img.numpy()
 #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
 #     plt.show()
 # dataiter = iter(trainloader)
 # images, labels = dataiter.next()
 # # show images
 # imshow(torchvision.utils.make_grid(images))
 # # print labels
 # print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
 class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
@ -38,7 +23,7 @@ class Net(nn.Module):
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 71 * 71, 120)
        self.fc2 = nn.Linear(120, 84)
-        self.fc3 = nn.Linear(84, 10)
+        self.fc3 = nn.Linear(84, 4)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
@ -50,40 +35,31 @@ class Net(nn.Module):
        return x
 def train():
    net = Net()
    trainset = torchvision.datasets.ImageFolder(
        root='./resources/zbior_uczacy', transform=transform)
    trainloader = torch.utils.data.DataLoader(
-        trainset, batch_size=1, shuffle=True, num_workers=2)
+        trainset, batch_size=2, shuffle=True, num_workers=2)
    classes = ('glass', 'metal', 'paper', 'plastic')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
-    for epoch in range(10):  # loop over the dataset multiple times
+    for epoch in range(10):
        print("siema")
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # print statistics
            running_loss += loss.item()
-            if i:    # print every 2000 mini-batches
+            if i:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss))
                running_loss = 0.0
                print("kyrw")
    print('Finished Training')
    PATH = './wytrenowaned.pth'
@ -95,61 +71,9 @@ def predict(img_path):
    PATH = './wytrenowaned.pth'
    img = Image.open(img_path)
    pil_to_tensor = transforms.ToTensor()(img).unsqueeze_(0)
    if(pil_to_tensor.shape[1] == 1):
        print(img_path)
    classes = ('glass', 'metal', 'paper', 'plastic')
    # testset = torchvision.datasets.ImageFolder(
    #     root='./resources/smieci', transform=transform)
    # testloader = torch.utils.data.DataLoader(
    #     testset, batch_size=4, shuffle=True, num_workers=2)
    # dataiter = iter(testloader)
    # images, labels = dataiter.next()
 # print images
 # imshow(torchvision.utils.make_grid(images))
    # print('GroundTruth: ', ' '.join('%5s' %
    #                                 classes[labels[j]] for j in range(4)))
    # print('---')
    # print(images)
    # print('---')
    net.load_state_dict(torch.load(PATH))
    net.eval()
    outputs = net(pil_to_tensor)
    return classes[torch.max(outputs, 1)[1]]
    # print(classes[torch.max(outputs, 1)[1]])
    # print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
    #                               for j in range(1)))
 # correct = 0
 # total = 0
 # with torch.no_grad():
 #     for data in testloader:
 #         images, labels = data
 #         outputs = net(images)
 #         _, predicted = torch.max(outputs.data, 1)
 #         total += labels.size(0)
 #         correct += (predicted == labels).sum().item()
 # print('Accuracy of the network on the test images: %d %%' % (
 #     100 * correct / total))
 # class_correct = list(0. for i in range(4))
 # class_total = list(0. for i in range(4))
 # with torch.no_grad():
 #     for data in testloader:
 #         images, labels = data
 #         outputs = net(images)
 #         _, predicted = torch.max(outputs, 1)
 #         c = (predicted == labels).squeeze()
 #         for i in range(3):
 #             label = labels[i]
 #             print(labels)
 #             class_correct[label] += c[i].item()
 #             class_total[label] += 1
 # for i in range(4):
 #     print('Accuracy of %5s : %2d %%' % (
 #         classes[i], 100 * class_correct[i] / class_total[i]))
 # train()
--- a/wytrenowaned.pth
+++ b/wytrenowaned.pth