detecting digits

This commit is contained in:
shaaqu 2020-05-30 15:52:48 +02:00
parent 38d24273c5
commit a12cde0aa7
10 changed files with 98 additions and 88 deletions

View File

@ -19,12 +19,17 @@
<select /> <select />
</component> </component>
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="828778c9-9d97-422f-a727-18ddbd059b85" name="Default Changelist" comment="going to pytorch on conda eve"> <list default="true" id="828778c9-9d97-422f-a727-18ddbd059b85" name="Default Changelist" comment="add coder.py">
<change afterPath="$PROJECT_DIR$/coder/coder.py" afterDir="false" /> <change afterPath="$PROJECT_DIR$/coder/12345.png" afterDir="false" />
<change afterPath="$PROJECT_DIR$/coder/digit_reco_model.pt" afterDir="false" /> <change afterPath="$PROJECT_DIR$/coder/ll.png" afterDir="false" />
<change afterPath="$PROJECT_DIR$/coder/testno.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/coder.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/coder.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/digit_reco_model.pt" beforeDir="false" afterPath="$PROJECT_DIR$/coder/digit_reco_model.pt" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/digit_reco_model2.pt" beforeDir="false" afterPath="$PROJECT_DIR$/coder/digit_reco_model2.pt" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/digits_recognizer.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/digits_recognizer.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/coder/digits_recognizer.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/digits_recognizer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/gr_test.png" beforeDir="false" /> <change beforePath="$PROJECT_DIR$/coder/image.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/image.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/coder/rocognizer.py" beforeDir="false" afterPath="$PROJECT_DIR$/coder/rocognizer.py" afterDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
@ -73,17 +78,17 @@
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" /> <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
<property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" /> <property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="WebServerToolWindowFactoryState" value="false" /> <property name="WebServerToolWindowFactoryState" value="false" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/coder/dataset" /> <property name="last_opened_file_path" value="$PROJECT_DIR$/coder" />
<property name="restartRequiresConfirmation" value="false" /> <property name="restartRequiresConfirmation" value="false" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" /> <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component> </component>
<component name="RecentsManager"> <component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS"> <key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder\dataset" />
<recent name="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder" /> <recent name="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder" />
<recent name="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder\dataset" />
</key> </key>
</component> </component>
<component name="RunManager" selected="Python.digits_recognizer"> <component name="RunManager" selected="Python.rocognizer">
<configuration default="true" type="PythonConfigurationType" factoryName="Python"> <configuration default="true" type="PythonConfigurationType" factoryName="Python">
<module name="wozek" /> <module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
@ -110,6 +115,9 @@
<module name="wozek" /> <module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" /> <option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/coder" /> <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/coder" />
<option name="IS_MODULE_SDK" value="true" /> <option name="IS_MODULE_SDK" value="true" />
@ -129,6 +137,9 @@
<module name="wozek" /> <module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" /> <option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/coder" /> <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/coder" />
<option name="IS_MODULE_SDK" value="true" /> <option name="IS_MODULE_SDK" value="true" />
@ -185,7 +196,7 @@
<option name="INPUT_FILE" value="" /> <option name="INPUT_FILE" value="" />
<method v="2" /> <method v="2" />
</configuration> </configuration>
<configuration name="train_nn" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true"> <configuration name="z8" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="wozek" /> <module name="wozek" />
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
@ -198,7 +209,7 @@
<option name="ADD_CONTENT_ROOTS" value="true" /> <option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" /> <option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" /> <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="C:\Users\Pawel Lukaszewicz\PycharmProjects\AL-2020\coder\train_nn.py" /> <option name="SCRIPT_NAME" value="$PROJECT_DIR$/coder/z8.py" />
<option name="PARAMETERS" value="" /> <option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="true" /> <option name="SHOW_COMMAND_LINE" value="true" />
<option name="EMULATE_TERMINAL" value="false" /> <option name="EMULATE_TERMINAL" value="false" />
@ -209,18 +220,18 @@
</configuration> </configuration>
<list> <list>
<item itemvalue="Python.image" /> <item itemvalue="Python.image" />
<item itemvalue="Python.train_nn" />
<item itemvalue="Python.rocognizer" /> <item itemvalue="Python.rocognizer" />
<item itemvalue="Python.digits_recognizer" /> <item itemvalue="Python.digits_recognizer" />
<item itemvalue="Python.coder" /> <item itemvalue="Python.coder" />
<item itemvalue="Python.z8" />
</list> </list>
<recent_temporary> <recent_temporary>
<list> <list>
<item itemvalue="Python.digits_recognizer" />
<item itemvalue="Python.coder" />
<item itemvalue="Python.rocognizer" /> <item itemvalue="Python.rocognizer" />
<item itemvalue="Python.coder" />
<item itemvalue="Python.digits_recognizer" />
<item itemvalue="Python.z8" />
<item itemvalue="Python.image" /> <item itemvalue="Python.image" />
<item itemvalue="Python.train_nn" />
</list> </list>
</recent_temporary> </recent_temporary>
</component> </component>
@ -258,7 +269,10 @@
<workItem from="1590436739719" duration="6325000" /> <workItem from="1590436739719" duration="6325000" />
<workItem from="1590443664804" duration="2943000" /> <workItem from="1590443664804" duration="2943000" />
<workItem from="1590497613517" duration="6041000" /> <workItem from="1590497613517" duration="6041000" />
<workItem from="1590518246722" duration="12460000" /> <workItem from="1590518246722" duration="12616000" />
<workItem from="1590559069326" duration="13892000" />
<workItem from="1590575699320" duration="792000" />
<workItem from="1590766924835" duration="4315000" />
</task> </task>
<task id="LOCAL-00001" summary="create Shelf"> <task id="LOCAL-00001" summary="create Shelf">
<created>1589815443652</created> <created>1589815443652</created>
@ -344,7 +358,14 @@
<option name="project" value="LOCAL" /> <option name="project" value="LOCAL" />
<updated>1590447313737</updated> <updated>1590447313737</updated>
</task> </task>
<option name="localTasksCounter" value="13" /> <task id="LOCAL-00013" summary="add coder.py">
<created>1590538529471</created>
<option name="number" value="00013" />
<option name="presentableId" value="LOCAL-00013" />
<option name="project" value="LOCAL" />
<updated>1590538529471</updated>
</task>
<option name="localTasksCounter" value="14" />
<servers /> <servers />
</component> </component>
<component name="TypeScriptGeneratedFilesManager"> <component name="TypeScriptGeneratedFilesManager">
@ -376,7 +397,8 @@
<MESSAGE value="po" /> <MESSAGE value="po" />
<MESSAGE value="new dataset" /> <MESSAGE value="new dataset" />
<MESSAGE value="going to pytorch on conda eve" /> <MESSAGE value="going to pytorch on conda eve" />
<option name="LAST_COMMIT_MESSAGE" value="going to pytorch on conda eve" /> <MESSAGE value="add coder.py" />
<option name="LAST_COMMIT_MESSAGE" value="add coder.py" />
</component> </component>
<component name="WindowStateProjectService"> <component name="WindowStateProjectService">
<state x="115" y="162" key="#com.intellij.refactoring.safeDelete.UnsafeUsagesDialog" timestamp="1589923610328"> <state x="115" y="162" key="#com.intellij.refactoring.safeDelete.UnsafeUsagesDialog" timestamp="1589923610328">
@ -403,10 +425,10 @@
<screen x="0" y="0" width="1536" height="824" /> <screen x="0" y="0" width="1536" height="824" />
</state> </state>
<state x="277" y="57" key="SettingsEditor/0.0.1536.824@0.0.1536.824" timestamp="1590443566792" /> <state x="277" y="57" key="SettingsEditor/0.0.1536.824@0.0.1536.824" timestamp="1590443566792" />
<state x="361" y="145" key="Vcs.Push.Dialog.v2" timestamp="1590447321698"> <state x="361" y="145" key="Vcs.Push.Dialog.v2" timestamp="1590538563335">
<screen x="0" y="0" width="1536" height="824" /> <screen x="0" y="0" width="1536" height="824" />
</state> </state>
<state x="361" y="145" key="Vcs.Push.Dialog.v2/0.0.1536.824@0.0.1536.824" timestamp="1590447321698" /> <state x="361" y="145" key="Vcs.Push.Dialog.v2/0.0.1536.824@0.0.1536.824" timestamp="1590538563335" />
<state x="54" y="145" width="672" height="678" key="search.everywhere.popup" timestamp="1589918982407"> <state x="54" y="145" width="672" height="678" key="search.everywhere.popup" timestamp="1589918982407">
<screen x="0" y="0" width="1536" height="824" /> <screen x="0" y="0" width="1536" height="824" />
</state> </state>

BIN
coder/12345.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

View File

@ -4,47 +4,49 @@ import torchvision
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from time import time from time import time
from torchvision import datasets, transforms from torchvision import datasets, transforms
from torch import nn, optim, nn, optim from torch import nn, optim
import cv2 import cv2
def view_classify(img, ps): transform = transforms.Compose([transforms.ToTensor(),
''' Function for viewing an image and it's predicted classes. transforms.Normalize((0.5,), (0.5,)),
''' ])
ps = ps.data.numpy().squeeze()
fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
ax1.axis('off')
ax2.barh(np.arange(10), ps)
ax2.set_aspect(0.1)
ax2.set_yticks(np.arange(10))
ax2.set_yticklabels(np.arange(10))
ax2.set_title('Class Probability')
ax2.set_xlim(0, 1.1)
plt.tight_layout()
# load nn model # load nn model
model = torch.load('digit_reco_model2.pt') input_size = 784 # = 28*28
hidden_sizes = [128, 128, 64]
output_size = 10
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
nn.ReLU(),
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
nn.ReLU(),
nn.Linear(hidden_sizes[1], hidden_sizes[2]),
nn.ReLU(),
nn.Linear(hidden_sizes[2], output_size),
nn.LogSoftmax(dim=-1))
model.load_state_dict(torch.load('digit_reco_model2.pt'))
model.eval()
# model = torch.load('digit_reco_model2.pt')
if model is None: if model is None:
print("Model is not loaded.") print("Model is not loaded.")
else: else:
print("Model is loaded.") print("Model is loaded.")
# image
img = cv2.cvtColor(cv2.imread('test3.png'), cv2.COLOR_BGR2GRAY)
img = cv2.blur(img, (9, 9)) # poprawia jakosc
img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
img = img.reshape((len(img), -1))
print(type(img)) # img from dataset
# print(img.shape) val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
# plt.imshow(img ,cmap='binary')
# plt.show() val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
img = np.array(img, dtype=np.float32)
img = torch.from_numpy(img) images, labels = next(iter(val_loader))
img = img.view(1, 784) print(type(images))
img = images[0].view(1, 784)
plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
plt.show()
# recognizing # recognizing
@ -54,5 +56,3 @@ with torch.no_grad():
ps = torch.exp(logps) ps = torch.exp(logps)
probab = list(ps.numpy()[0]) probab = list(ps.numpy()[0])
print("Predicted Digit =", probab.index(max(probab))) print("Predicted Digit =", probab.index(max(probab)))
view_classify(img.view(1, 28, 28), ps)

Binary file not shown.

Binary file not shown.

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from time import time from time import time
from torchvision import datasets, transforms from torchvision import datasets, transforms
from torch import nn, optim from torch import nn, optim
import cv2
# IMG transform # IMG transform
transform = transforms.Compose([transforms.ToTensor(), transform = transforms.Compose([transforms.ToTensor(),
@ -17,15 +18,6 @@ val_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, tr
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=True)
data_iter = iter(train_loader)
images, labels = data_iter.next()
print(images.shape)
print(labels.shape)
plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')
plt.show()
# building nn model # building nn model
input_size = 784 # = 28*28 input_size = 784 # = 28*28
hidden_sizes = [128, 128, 64] hidden_sizes = [128, 128, 64]
@ -48,15 +40,12 @@ images = images.view(images.shape[0], -1)
logps = model(images) # log probabilities logps = model(images) # log probabilities
loss = criterion(logps, labels) # calculate the NLL loss loss = criterion(logps, labels) # calculate the NLL loss
# print('Before backward pass: \n', model[0].weight.grad)
loss.backward()
# print('After backward pass: \n', model[0].weight.grad)
# training # training
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9) optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time() time0 = time()
epochs = 100 epochs = 1
for e in range(epochs): for e in range(epochs):
running_loss = 0 running_loss = 0
for images, labels in train_loader: for images, labels in train_loader:
@ -84,7 +73,6 @@ print("\nTraining Time (in minutes) =", (time() - time0) / 60)
# testing # testing
images, labels = next(iter(val_loader)) images, labels = next(iter(val_loader))
print(type(images))
img = images[0].view(1, 784) img = images[0].view(1, 784)
with torch.no_grad(): with torch.no_grad():
logps = model(img) logps = model(img)
@ -115,5 +103,5 @@ print("\nModel Accuracy =", (correct_count / all_count))
# saving model # saving model
# torch.save(model, './digit_reco_model.pt') # torch.save(model.state_dict(), './digit_reco_model.pt')
torch.save(model, './digit_reco_model2.pt') # torch.save(model.state_dict(), './digit_reco_model2.pt')

View File

@ -6,6 +6,7 @@ from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier from sklearn.neural_network import MLPClassifier
import pandas as pd import pandas as pd
import cv2 import cv2
import keras
# 28x28 # 28x28
train_data = np.genfromtxt('dataset/train.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8') train_data = np.genfromtxt('dataset/train.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8')

BIN
coder/ll.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -3,35 +3,34 @@ import argparse
import imutils import imutils
import cv2 import cv2
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import torch
from PIL import Image
img = cv2.cvtColor(cv2.imread('barcode.jpg'), cv2.COLOR_BGR2GRAY) path = "test1.jpg"
ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F img = cv2.imread(path)
X = cv2.Sobel(img, ddepth=ddepth, dx=1, dy=0, ksize=-1)
Y = cv2.Sobel(img, ddepth=ddepth, dx=0, dy=1, ksize=-1)
gradient = cv2.subtract(X, Y) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gradient = cv2.convertScaleAbs(gradient) img_gray = cv2.GaussianBlur(img_gray, (5, 5), 0)
blurred = cv2.blur(gradient, (9, 9)) ret, im_th = cv2.threshold(img_gray, 90, 255, cv2.THRESH_BINARY_INV)
(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7)) ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
closed = cv2.erode(closed, None, iterations=4)
closed = cv2.dilate(closed, None, iterations=4)
cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) rects = [cv2.boundingRect(ctr) for ctr in ctrs]
cnts = imutils.grab_contours(cnts)
c = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
rect = cv2.minAreaRect(c) for rect in rects:
box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect) # Draw the rectangles
box = np.int0(box) cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
# Make the rectangular region around the digit
leng = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
# Resize the image
roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
# Calculate the HOG features
cv2.drawContours(img, [box], -1, (0, 255, 0), 3) cv2.imshow("Resulting Image with Rectangular ROIs", img)
cv2.imshow("Image", img) cv2.waitKey()
cv2.waitKey(0)
plt.imshow(closed ,cmap='binary')
plt.show()

BIN
coder/testno.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB