diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index ad5314e..19317c7 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -19,12 +19,17 @@
-
-
-
+
+
+
+
+
+
+
-
+
+
@@ -73,17 +78,17 @@
-
+
-
+
-
+
@@ -110,6 +115,9 @@
+
+
+
@@ -129,6 +137,9 @@
+
+
+
@@ -185,7 +196,7 @@
-
+
@@ -198,7 +209,7 @@
-
+
@@ -209,18 +220,18 @@
-
+
-
-
+
+
+
-
@@ -258,7 +269,10 @@
-
+
+
+
+
1589815443652
@@ -344,7 +358,14 @@
1590447313737
-
+
+ 1590538529471
+
+
+
+ 1590538529471
+
+
@@ -376,7 +397,8 @@
-
+
+
@@ -403,10 +425,10 @@
-
+
-
+
diff --git a/coder/12345.png b/coder/12345.png
new file mode 100644
index 0000000..af91483
Binary files /dev/null and b/coder/12345.png differ
diff --git a/coder/coder.py b/coder/coder.py
index e6f0d84..658fde4 100644
--- a/coder/coder.py
+++ b/coder/coder.py
@@ -4,47 +4,49 @@ import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
-from torch import nn, optim, nn, optim
+from torch import nn, optim
import cv2
-def view_classify(img, ps):
- ''' Function for viewing an image and it's predicted classes.
- '''
- ps = ps.data.numpy().squeeze()
+transform = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5,), (0.5,)),
+ ])
+
- fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
- ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
- ax1.axis('off')
- ax2.barh(np.arange(10), ps)
- ax2.set_aspect(0.1)
- ax2.set_yticks(np.arange(10))
- ax2.set_yticklabels(np.arange(10))
- ax2.set_title('Class Probability')
- ax2.set_xlim(0, 1.1)
- plt.tight_layout()
# load nn model
-model = torch.load('digit_reco_model2.pt')
+input_size = 784 # = 28*28
+hidden_sizes = [128, 128, 64]
+output_size = 10
+model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
+ nn.ReLU(),
+ nn.Linear(hidden_sizes[0], hidden_sizes[1]),
+ nn.ReLU(),
+ nn.Linear(hidden_sizes[1], hidden_sizes[2]),
+ nn.ReLU(),
+ nn.Linear(hidden_sizes[2], output_size),
+ nn.LogSoftmax(dim=-1))
+model.load_state_dict(torch.load('digit_reco_model2.pt'))
+model.eval()
+# model = torch.load('digit_reco_model2.pt')
if model is None:
print("Model is not loaded.")
else:
print("Model is loaded.")
-# image
-img = cv2.cvtColor(cv2.imread('test3.png'), cv2.COLOR_BGR2GRAY)
-img = cv2.blur(img, (9, 9)) # poprawia jakosc
-img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
-img = img.reshape((len(img), -1))
-print(type(img))
-# print(img.shape)
-# plt.imshow(img ,cmap='binary')
-# plt.show()
-img = np.array(img, dtype=np.float32)
-img = torch.from_numpy(img)
-img = img.view(1, 784)
+# img from dataset
+val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
+
+val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
+
+images, labels = next(iter(val_loader))
+print(type(images))
+img = images[0].view(1, 784)
+plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
+plt.show()
+
# recognizing
@@ -54,5 +56,3 @@ with torch.no_grad():
ps = torch.exp(logps)
probab = list(ps.numpy()[0])
print("Predicted Digit =", probab.index(max(probab)))
-
-view_classify(img.view(1, 28, 28), ps)
diff --git a/coder/digit_reco_model.pt b/coder/digit_reco_model.pt
index 391642b..5232e72 100644
Binary files a/coder/digit_reco_model.pt and b/coder/digit_reco_model.pt differ
diff --git a/coder/digit_reco_model2.pt b/coder/digit_reco_model2.pt
index f8f60db..56953d6 100644
Binary files a/coder/digit_reco_model2.pt and b/coder/digit_reco_model2.pt differ
diff --git a/coder/digits_recognizer.py b/coder/digits_recognizer.py
index 0788313..9f2fe0d 100644
--- a/coder/digits_recognizer.py
+++ b/coder/digits_recognizer.py
@@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim
+import cv2
# IMG transform
transform = transforms.Compose([transforms.ToTensor(),
@@ -17,15 +18,6 @@ val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, tr
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
-data_iter = iter(train_loader)
-images, labels = data_iter.next()
-
-print(images.shape)
-print(labels.shape)
-
-plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
-plt.show()
-
# building nn model
input_size = 784 # = 28*28
hidden_sizes = [128, 128, 64]
@@ -48,15 +40,12 @@ images = images.view(images.shape[0], -1)
logps = model(images) # log probabilities
loss = criterion(logps, labels) # calculate the NLL loss
-# print('Before backward pass: \n', model[0].weight.grad)
-loss.backward()
-# print('After backward pass: \n', model[0].weight.grad)
# training
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
-epochs = 100
+epochs = 1
for e in range(epochs):
running_loss = 0
for images, labels in train_loader:
@@ -84,7 +73,6 @@ print("\nTraining Time (in minutes) =", (time() - time0) / 60)
# testing
images, labels = next(iter(val_loader))
-print(type(images))
img = images[0].view(1, 784)
with torch.no_grad():
logps = model(img)
@@ -115,5 +103,5 @@ print("\nModel Accuracy =", (correct_count / all_count))
# saving model
-# torch.save(model, './digit_reco_model.pt')
-torch.save(model, './digit_reco_model2.pt')
\ No newline at end of file
+# torch.save(model.state_dict(), './digit_reco_model.pt')
+# torch.save(model.state_dict(), './digit_reco_model2.pt')
\ No newline at end of file
diff --git a/coder/image.py b/coder/image.py
index e4c651e..86e0068 100644
--- a/coder/image.py
+++ b/coder/image.py
@@ -6,6 +6,7 @@ from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
import pandas as pd
import cv2
+import keras
# 28x28
train_data = np.genfromtxt('dataset/train.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8')
diff --git a/coder/ll.png b/coder/ll.png
new file mode 100644
index 0000000..8ba51ad
Binary files /dev/null and b/coder/ll.png differ
diff --git a/coder/rocognizer.py b/coder/rocognizer.py
index 34905f6..455c312 100644
--- a/coder/rocognizer.py
+++ b/coder/rocognizer.py
@@ -3,35 +3,34 @@ import argparse
import imutils
import cv2
import matplotlib.pyplot as plt
+import torch
+from PIL import Image
-img = cv2.cvtColor(cv2.imread('barcode.jpg'), cv2.COLOR_BGR2GRAY)
+path = "test1.jpg"
-ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
-X = cv2.Sobel(img, ddepth=ddepth, dx=1, dy=0, ksize=-1)
-Y = cv2.Sobel(img, ddepth=ddepth, dx=0, dy=1, ksize=-1)
+img = cv2.imread(path)
-gradient = cv2.subtract(X, Y)
-gradient = cv2.convertScaleAbs(gradient)
+img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+img_gray = cv2.GaussianBlur(img_gray, (5, 5), 0)
-blurred = cv2.blur(gradient, (9, 9))
-(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
+ret, im_th = cv2.threshold(img_gray, 90, 255, cv2.THRESH_BINARY_INV)
-kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))
-closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
-closed = cv2.erode(closed, None, iterations=4)
-closed = cv2.dilate(closed, None, iterations=4)
+ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-cnts = imutils.grab_contours(cnts)
-c = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
+rects = [cv2.boundingRect(ctr) for ctr in ctrs]
-rect = cv2.minAreaRect(c)
-box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
-box = np.int0(box)
+for rect in rects:
+ # Draw the rectangles
+ cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
+ # Make the rectangular region around the digit
+ leng = int(rect[3] * 1.6)
+ pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
+ pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
+ roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
+ # Resize the image
+ roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
+ roi = cv2.dilate(roi, (3, 3))
+ # Calculate the HOG features
-cv2.drawContours(img, [box], -1, (0, 255, 0), 3)
-cv2.imshow("Image", img)
-cv2.waitKey(0)
-
-plt.imshow(closed ,cmap='binary')
-plt.show()
\ No newline at end of file
+cv2.imshow("Resulting Image with Rectangular ROIs", img)
+cv2.waitKey()
\ No newline at end of file
diff --git a/coder/testno.png b/coder/testno.png
new file mode 100644
index 0000000..e20bf76
Binary files /dev/null and b/coder/testno.png differ