detecting digits

This commit is contained in:
shaaqu 2020-05-30 15:52:48 +02:00
parent 38d24273c5
commit a12cde0aa7
10 changed files with 98 additions and 88 deletions

View File

@ -19,12 +19,17 @@
<select />
</component>
<component name="ChangeListManager">
<list default="true" id="828778c9-9d97-422f-a727-18ddbd059b85" name="Default Changelist" comment="going to pytorch on conda eve">
<change afterPath="$PROJECT_DIR$/coder/coder.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/coder/digit_reco_model.pt" afterDir="false" />
<list default="true" id="828778c9-9d97-422f-a727-18ddbd059b85" name="Default Changelist" comment="add coder.py">
<change afterPath="$PROJECT_DIR$/coder/12345.png" afterDir="false" />
<change afterPath="$PROJECT_DIR$/coder/ll.png" afterDir="false" />
<change afterPath="$PROJECT_DIR$/coder/testno.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/coder.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/coder.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/digit_reco_model.pt" beforeDir="false" afterPath="$PROJECT_DIR$/coder/digit_reco_model.pt" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/digit_reco_model2.pt" beforeDir="false" afterPath="$PROJECT_DIR$/coder/digit_reco_model2.pt" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/digits_recognizer.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/digits_recognizer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/gr_test.png" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/coder/image.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/image.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/rocognizer.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/rocognizer.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@ -73,17 +78,17 @@
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
<property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/coder/dataset" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/coder" />
<property name="restartRequiresConfirmation" value="false" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder\dataset" />
<recent name="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder" />
<recent name="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder\dataset" />
</key>
</component>
<component name="RunManager" selected="Python.digits_recognizer">
<component name="RunManager" selected="Python.rocognizer">
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
<module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" />
@ -110,6 +115,9 @@
<module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/coder" />
<option name="IS_MODULE_SDK" value="true" />
@ -129,6 +137,9 @@
<module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/coder" />
<option name="IS_MODULE_SDK" value="true" />
@ -185,7 +196,7 @@
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="train_nn" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<configuration name="z8" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
@ -198,7 +209,7 @@
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder\train_nn.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/coder/z8.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="true" />
<option name="EMULATE_TERMINAL" value="false" />
@ -209,18 +220,18 @@
</configuration>
<list>
<item itemvalue="Python.image" />
<item itemvalue="Python.train_nn" />
<item itemvalue="Python.rocognizer" />
<item itemvalue="Python.digits_recognizer" />
<item itemvalue="Python.coder" />
<item itemvalue="Python.z8" />
</list>
<recent_temporary>
<list>
<item itemvalue="Python.digits_recognizer" />
<item itemvalue="Python.coder" />
<item itemvalue="Python.rocognizer" />
<item itemvalue="Python.coder" />
<item itemvalue="Python.digits_recognizer" />
<item itemvalue="Python.z8" />
<item itemvalue="Python.image" />
<item itemvalue="Python.train_nn" />
</list>
</recent_temporary>
</component>
@ -258,7 +269,10 @@
<workItem from="1590436739719" duration="6325000" />
<workItem from="1590443664804" duration="2943000" />
<workItem from="1590497613517" duration="6041000" />
<workItem from="1590518246722" duration="12460000" />
<workItem from="1590518246722" duration="12616000" />
<workItem from="1590559069326" duration="13892000" />
<workItem from="1590575699320" duration="792000" />
<workItem from="1590766924835" duration="4315000" />
</task>
<task id="LOCAL-00001" summary="create Shelf">
<created>1589815443652</created>
@ -344,7 +358,14 @@
<option name="project" value="LOCAL" />
<updated>1590447313737</updated>
</task>
<option name="localTasksCounter" value="13" />
<task id="LOCAL-00013" summary="add coder.py">
<created>1590538529471</created>
<option name="number" value="00013" />
<option name="presentableId" value="LOCAL-00013" />
<option name="project" value="LOCAL" />
<updated>1590538529471</updated>
</task>
<option name="localTasksCounter" value="14" />
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
@ -376,7 +397,8 @@
<MESSAGE value="po" />
<MESSAGE value="new dataset" />
<MESSAGE value="going to pytorch on conda eve" />
<option name="LAST_COMMIT_MESSAGE" value="going to pytorch on conda eve" />
<MESSAGE value="add coder.py" />
<option name="LAST_COMMIT_MESSAGE" value="add coder.py" />
</component>
<component name="WindowStateProjectService">
<state x="115" y="162" key="#com.intellij.refactoring.safeDelete.UnsafeUsagesDialog" timestamp="1589923610328">
@ -403,10 +425,10 @@
<screen x="0" y="0" width="1536" height="824" />
</state>
<state x="277" y="57" key="SettingsEditor/0.0.1536.824@0.0.1536.824" timestamp="1590443566792" />
<state x="361" y="145" key="Vcs.Push.Dialog.v2" timestamp="1590447321698">
<state x="361" y="145" key="Vcs.Push.Dialog.v2" timestamp="1590538563335">
<screen x="0" y="0" width="1536" height="824" />
</state>
<state x="361" y="145" key="Vcs.Push.Dialog.v2/0.0.1536.824@0.0.1536.824" timestamp="1590447321698" />
<state x="361" y="145" key="Vcs.Push.Dialog.v2/0.0.1536.824@0.0.1536.824" timestamp="1590538563335" />
<state x="54" y="145" width="672" height="678" key="search.everywhere.popup" timestamp="1589918982407">
<screen x="0" y="0" width="1536" height="824" />
</state>

BIN
coder/12345.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

View File

@ -4,47 +4,49 @@ import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim, nn, optim
from torch import nn, optim
import cv2
def view_classify(img, ps):
''' Function for viewing an image and it's predicted classes.
'''
ps = ps.data.numpy().squeeze()
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)),
])
fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
ax1.axis('off')
ax2.barh(np.arange(10), ps)
ax2.set_aspect(0.1)
ax2.set_yticks(np.arange(10))
ax2.set_yticklabels(np.arange(10))
ax2.set_title('Class Probability')
ax2.set_xlim(0, 1.1)
plt.tight_layout()
# load nn model
model = torch.load('digit_reco_model2.pt')
input_size = 784 # = 28*28
hidden_sizes = [128, 128, 64]
output_size = 10
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
nn.ReLU(),
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
nn.ReLU(),
nn.Linear(hidden_sizes[1], hidden_sizes[2]),
nn.ReLU(),
nn.Linear(hidden_sizes[2], output_size),
nn.LogSoftmax(dim=-1))
model.load_state_dict(torch.load('digit_reco_model2.pt'))
model.eval()
# model = torch.load('digit_reco_model2.pt')
if model is None:
print("Model is not loaded.")
else:
print("Model is loaded.")
# image
img = cv2.cvtColor(cv2.imread('test3.png'), cv2.COLOR_BGR2GRAY)
img = cv2.blur(img, (9, 9)) # poprawia jakosc
img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
img = img.reshape((len(img), -1))
print(type(img))
# print(img.shape)
# plt.imshow(img ,cmap='binary')
# plt.show()
img = np.array(img, dtype=np.float32)
img = torch.from_numpy(img)
img = img.view(1, 784)
# img from dataset
val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
images, labels = next(iter(val_loader))
print(type(images))
img = images[0].view(1, 784)
plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
plt.show()
# recognizing
@ -54,5 +56,3 @@ with torch.no_grad():
ps = torch.exp(logps)
probab = list(ps.numpy()[0])
print("Predicted Digit =", probab.index(max(probab)))
view_classify(img.view(1, 28, 28), ps)

Binary file not shown.

Binary file not shown.

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim
import cv2
# IMG transform
transform = transforms.Compose([transforms.ToTensor(),
@ -17,15 +18,6 @@ val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, tr
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
data_iter = iter(train_loader)
images, labels = data_iter.next()
print(images.shape)
print(labels.shape)
plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
plt.show()
# building nn model
input_size = 784 # = 28*28
hidden_sizes = [128, 128, 64]
@ -48,15 +40,12 @@ images = images.view(images.shape[0], -1)
logps = model(images) # log probabilities
loss = criterion(logps, labels) # calculate the NLL loss
# print('Before backward pass: \n', model[0].weight.grad)
loss.backward()
# print('After backward pass: \n', model[0].weight.grad)
# training
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
epochs = 100
epochs = 1
for e in range(epochs):
running_loss = 0
for images, labels in train_loader:
@ -84,7 +73,6 @@ print("\nTraining Time (in minutes) =", (time() - time0) / 60)
# testing
images, labels = next(iter(val_loader))
print(type(images))
img = images[0].view(1, 784)
with torch.no_grad():
logps = model(img)
@ -115,5 +103,5 @@ print("\nModel Accuracy =", (correct_count / all_count))
# saving model
# torch.save(model, './digit_reco_model.pt')
torch.save(model, './digit_reco_model2.pt')
# torch.save(model.state_dict(), './digit_reco_model.pt')
# torch.save(model.state_dict(), './digit_reco_model2.pt')

View File

@ -6,6 +6,7 @@ from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
import pandas as pd
import cv2
import keras
# 28x28
train_data = np.genfromtxt('dataset/train.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8')

BIN
coder/ll.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -3,35 +3,34 @@ import argparse
import imutils
import cv2
import matplotlib.pyplot as plt
import torch
from PIL import Image
img = cv2.cvtColor(cv2.imread('barcode.jpg'), cv2.COLOR_BGR2GRAY)
path = "test1.jpg"
ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
X = cv2.Sobel(img, ddepth=ddepth, dx=1, dy=0, ksize=-1)
Y = cv2.Sobel(img, ddepth=ddepth, dx=0, dy=1, ksize=-1)
img = cv2.imread(path)
gradient = cv2.subtract(X, Y)
gradient = cv2.convertScaleAbs(gradient)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_gray = cv2.GaussianBlur(img_gray, (5, 5), 0)
blurred = cv2.blur(gradient, (9, 9))
(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
ret, im_th = cv2.threshold(img_gray, 90, 255, cv2.THRESH_BINARY_INV)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
closed = cv2.erode(closed, None, iterations=4)
closed = cv2.dilate(closed, None, iterations=4)
ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
c = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
rects = [cv2.boundingRect(ctr) for ctr in ctrs]
rect = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
box = np.int0(box)
for rect in rects:
# Draw the rectangles
cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
# Make the rectangular region around the digit
leng = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
# Resize the image
roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
# Calculate the HOG features
cv2.drawContours(img, [box], -1, (0, 255, 0), 3)
cv2.imshow("Image", img)
cv2.waitKey(0)
plt.imshow(closed ,cmap='binary')
plt.show()
cv2.imshow("Resulting Image with Rectangular ROIs", img)
cv2.waitKey()

BIN
coder/testno.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB