2020-05-20 07:32:13 +02:00
|
|
|
import numpy as np
|
|
|
|
from PIL import Image
|
|
|
|
import matplotlib.pyplot as plt
|
2020-05-20 08:24:33 +02:00
|
|
|
from sklearn import datasets
|
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
from sklearn.neural_network import MLPClassifier
|
2020-05-25 00:24:34 +02:00
|
|
|
import pandas as pd
|
2020-05-20 07:32:13 +02:00
|
|
|
import cv2
|
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
# 28x28
|
|
|
|
train_data = np.genfromtxt('dataset/train.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8')
|
|
|
|
test_data = np.genfromtxt('dataset/test.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8')
|
2020-05-25 00:24:34 +02:00
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
# train_data = pd.read_csv('dataset/train.csv')
|
|
|
|
# test_data = pd.read_csv('dataset/test.csv')
|
2020-05-20 07:32:13 +02:00
|
|
|
|
2020-05-20 08:24:33 +02:00
|
|
|
# training
|
|
|
|
# recznie napisane cyfry
|
2020-05-20 07:32:13 +02:00
|
|
|
|
2020-05-25 00:24:34 +02:00
|
|
|
digits = datasets.load_digits()
|
2020-05-20 08:24:33 +02:00
|
|
|
y = digits.target
|
|
|
|
x = digits.images.reshape((len(digits.images), -1))
|
2020-05-25 00:24:34 +02:00
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
# print(type(y[0]), type(x[0]))
|
|
|
|
# ogarnac zbior, zwiekszyc warstwy
|
|
|
|
|
|
|
|
# x_train = train_data.iloc[:, 1:].values.astype('float32')
|
|
|
|
# y_train = train_data.iloc[:, 0].values.astype('int32')
|
|
|
|
# x_test = test_data.values.astype('float32')
|
2020-05-25 00:24:34 +02:00
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
x_train = train_data[0:10000, 1:]
|
|
|
|
y_train = train_data[0:10000, 0]
|
|
|
|
x_test = train_data[10001:20000, 1:]
|
|
|
|
y_test = train_data[10001:20000, 0].astype('int')
|
2020-05-20 07:32:13 +02:00
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
print(type(y_test[0]), type(x_test[0]))
|
2020-05-20 07:32:13 +02:00
|
|
|
|
2020-05-25 00:24:34 +02:00
|
|
|
# x_train = x[:900]
|
|
|
|
# y_train = y[:900]
|
|
|
|
# x_test = x[900:]
|
|
|
|
# y_test = y[900:]
|
2020-05-20 07:32:13 +02:00
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
# 500, 500, 500, 500, 500
|
|
|
|
mlp = MLPClassifier(hidden_layer_sizes=(150, 100, 100, 100), activation='logistic', alpha=1e-4,
|
2020-05-25 00:24:34 +02:00
|
|
|
solver='sgd', tol=0.000000000001, random_state=1,
|
2020-05-26 00:55:12 +02:00
|
|
|
learning_rate_init=.1, verbose=True, max_iter=10000)
|
2020-05-20 08:24:33 +02:00
|
|
|
|
2020-05-25 00:24:34 +02:00
|
|
|
mlp.fit(x_train, y_train)
|
2020-05-20 08:24:33 +02:00
|
|
|
predictions = mlp.predict(x_test)
|
2020-05-25 00:24:34 +02:00
|
|
|
|
|
|
|
print("Accuracy: ", accuracy_score(y_test, predictions))
|
2020-05-20 08:24:33 +02:00
|
|
|
|
|
|
|
# image
|
|
|
|
|
2020-05-25 00:24:34 +02:00
|
|
|
img = cv2.cvtColor(cv2.imread('test5.jpg'), cv2.COLOR_BGR2GRAY)
|
2020-05-26 00:55:12 +02:00
|
|
|
img = cv2.blur(img, (9, 9)) # poprawia jakosc
|
2020-05-25 00:24:34 +02:00
|
|
|
img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
|
|
|
|
img = img.reshape((len(img), -1))
|
2020-05-20 08:24:33 +02:00
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
# print(type(img))
|
|
|
|
# print(img.shape)
|
|
|
|
# plt.imshow(img ,cmap='binary')
|
|
|
|
# plt.show()
|
2020-05-20 08:24:33 +02:00
|
|
|
|
|
|
|
data = []
|
|
|
|
|
|
|
|
rows, cols = img.shape
|
|
|
|
for i in range(rows):
|
|
|
|
for j in range(cols):
|
|
|
|
k = img[i, j]
|
2020-05-20 11:45:55 +02:00
|
|
|
if k > 225:
|
2020-05-26 00:55:12 +02:00
|
|
|
k = 0 # brak czarnego
|
2020-05-20 08:24:33 +02:00
|
|
|
else:
|
2020-05-26 00:55:12 +02:00
|
|
|
k = 255
|
2020-05-20 08:24:33 +02:00
|
|
|
|
|
|
|
data.append(k)
|
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
data = np.asarray(data, dtype=np.float64)
|
|
|
|
# print(data)
|
|
|
|
print(type(data))
|
2020-05-20 08:24:33 +02:00
|
|
|
|
|
|
|
predictions = mlp.predict([data])
|
|
|
|
|
2020-05-26 00:55:12 +02:00
|
|
|
print("Liczba to:", predictions[0].astype('int'))
|