@ -4,47 +4,49 @@ import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim, nn, optim
from torch import nn, optim
import cv2
def view_classify(img, ps):
''' Function for viewing an image and it's predicted classes.
ps = ps.data.numpy().squeeze()
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)),
fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
ax2.barh(np.arange(10), ps)
ax2.set_title('Class Probability')
ax2.set_xlim(0, 1.1)
# load nn model
model = torch.load('digit_reco_model2.pt')
input_size = 784 # = 28*28
hidden_sizes = [128, 128, 64]
output_size = 10
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
nn.Linear(hidden_sizes[1], hidden_sizes[2]),
nn.Linear(hidden_sizes[2], output_size),
# model = torch.load('digit_reco_model2.pt')
if model is None:
print("Model is not loaded.")
print("Model is loaded.")
# image
img = cv2.cvtColor(cv2.imread('test3.png'), cv2.COLOR_BGR2GRAY)
img = cv2.blur(img, (9, 9)) # poprawia jakosc
img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
img = img.reshape((len(img), -1))
# print(img.shape)
# plt.imshow(img ,cmap='binary')
# plt.show()
img = np.array(img, dtype=np.float32)
img = torch.from_numpy(img)
img = img.view(1, 784)
# img from dataset
val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
images, labels = next(iter(val_loader))
img = images[0].view(1, 784)
plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
# recognizing
@ -54,5 +56,3 @@ with torch.no_grad():
ps = torch.exp(logps)
probab = list(ps.numpy()[0])
print("Predicted Digit =", probab.index(max(probab)))
view_classify(img.view(1, 28, 28), ps)

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim
import cv2
# IMG transform
transform = transforms.Compose([transforms.ToTensor(),
@ -17,15 +18,6 @@ val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, tr
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
data_iter = iter(train_loader)
images, labels = data_iter.next()
plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
# building nn model
input_size = 784 # = 28*28
hidden_sizes = [128, 128, 64]
@ -48,15 +40,12 @@ images = images.view(images.shape[0], -1)
logps = model(images) # log probabilities
loss = criterion(logps, labels) # calculate the NLL loss
# print('Before backward pass: \n', model[0].weight.grad)
# print('After backward pass: \n', model[0].weight.grad)
# training
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
epochs = 100
epochs = 1
for e in range(epochs):
running_loss = 0
for images, labels in train_loader:
@ -84,7 +73,6 @@ print("\nTraining Time (in minutes) =", (time() - time0) / 60)
# testing
images, labels = next(iter(val_loader))
img = images[0].view(1, 784)
with torch.no_grad():
logps = model(img)
@ -115,5 +103,5 @@ print("\nModel Accuracy =", (correct_count / all_count))
# saving model
# torch.save(model, './digit_reco_model.pt')
torch.save(model, './digit_reco_model2.pt')
# torch.save(model.state_dict(), './digit_reco_model.pt')
# torch.save(model.state_dict(), './digit_reco_model2.pt')

@ -6,6 +6,7 @@ from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
import pandas as pd
import cv2
import keras
# 28x28
View File

@ -3,35 +3,34 @@ import argparse
import imutils
import cv2
import matplotlib.pyplot as plt
import torch
from PIL import Image
img = cv2.cvtColor(cv2.imread('barcode.jpg'), cv2.COLOR_BGR2GRAY)
path = "test1.jpg"
ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
X = cv2.Sobel(img, ddepth=ddepth, dx=1, dy=0, ksize=-1)
Y = cv2.Sobel(img, ddepth=ddepth, dx=0, dy=1, ksize=-1)
img = cv2.imread(path)
gradient = cv2.subtract(X, Y)
gradient = cv2.convertScaleAbs(gradient)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_gray = cv2.GaussianBlur(img_gray, (5, 5), 0)
blurred = cv2.blur(gradient, (9, 9))
(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
ret, im_th = cv2.threshold(img_gray, 90, 255, cv2.THRESH_BINARY_INV)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
closed = cv2.erode(closed, None, iterations=4)
closed = cv2.dilate(closed, None, iterations=4)
ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
c = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
rects = [cv2.boundingRect(ctr) for ctr in ctrs]
rect = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
box = np.int0(box)
for rect in rects:
# Draw the rectangles
cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
# Make the rectangular region around the digit
leng = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
# Resize the image
roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
# Calculate the HOG features
cv2.drawContours(img, [box], -1, (0, 255, 0), 3)
cv2.imshow("Image", img)
plt.imshow(closed ,cmap='binary')
cv2.imshow("Resulting Image with Rectangular ROIs", img)

