diff --git a/.idea/misc.xml b/.idea/misc.xml index 6649a8c..8eab719 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 681305c..ce7d1f0 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -20,9 +20,10 @@ - - + + + - + - + @@ -257,6 +258,11 @@ + + + + + 1589815443652 @@ -328,7 +334,14 @@ - @@ -358,7 +371,8 @@ - @@ -381,14 +395,14 @@ - + - - + + - + diff --git a/.idea/wozek.iml b/.idea/wozek.iml index c4b5840..f0f253e 100644 --- a/.idea/wozek.iml +++ b/.idea/wozek.iml @@ -4,7 +4,7 @@ - + diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/processed/test.pt b/coder/PATH_TO_STORE_TESTSET/MNIST/processed/test.pt new file mode 100644 index 0000000..c551b36 Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/processed/test.pt differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/processed/training.pt b/coder/PATH_TO_STORE_TESTSET/MNIST/processed/training.pt new file mode 100644 index 0000000..fef0767 Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/processed/training.pt differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-images-idx3-ubyte b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-images-idx3-ubyte new file mode 100644 index 0000000..1170b2c Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-images-idx3-ubyte differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-images-idx3-ubyte.gz b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-images-idx3-ubyte.gz new file mode 100644 index 0000000..5ace8ea Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-images-idx3-ubyte.gz differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-labels-idx1-ubyte b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-labels-idx1-ubyte new file mode 100644 index 0000000..d1c3a97 Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-labels-idx1-ubyte differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-labels-idx1-ubyte.gz b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-labels-idx1-ubyte.gz new file mode 100644 index 0000000..a7e1415 Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/t10k-labels-idx1-ubyte.gz differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-images-idx3-ubyte b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-images-idx3-ubyte new file mode 100644 index 0000000..bbce276 Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-images-idx3-ubyte differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-images-idx3-ubyte.gz b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-images-idx3-ubyte.gz new file mode 100644 index 0000000..b50e4b6 Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-images-idx3-ubyte.gz differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-labels-idx1-ubyte b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-labels-idx1-ubyte new file mode 100644 index 0000000..d6b4c5d Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-labels-idx1-ubyte differ diff --git a/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-labels-idx1-ubyte.gz b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-labels-idx1-ubyte.gz new file mode 100644 index 0000000..707a576 Binary files /dev/null and b/coder/PATH_TO_STORE_TESTSET/MNIST/raw/train-labels-idx1-ubyte.gz differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/processed/test.pt b/coder/PATH_TO_STORE_TRAINSET/MNIST/processed/test.pt new file mode 100644 index 0000000..80d7631 Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/processed/test.pt differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/processed/training.pt b/coder/PATH_TO_STORE_TRAINSET/MNIST/processed/training.pt new file mode 100644 index 0000000..38d1cf0 Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/processed/training.pt differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-images-idx3-ubyte b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-images-idx3-ubyte new file mode 100644 index 0000000..1170b2c Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-images-idx3-ubyte differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-images-idx3-ubyte.gz b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-images-idx3-ubyte.gz new file mode 100644 index 0000000..5ace8ea Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-images-idx3-ubyte.gz differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-labels-idx1-ubyte b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-labels-idx1-ubyte new file mode 100644 index 0000000..d1c3a97 Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-labels-idx1-ubyte differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-labels-idx1-ubyte.gz b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-labels-idx1-ubyte.gz new file mode 100644 index 0000000..a7e1415 Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/t10k-labels-idx1-ubyte.gz differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-images-idx3-ubyte b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-images-idx3-ubyte new file mode 100644 index 0000000..bbce276 Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-images-idx3-ubyte differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-images-idx3-ubyte.gz b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-images-idx3-ubyte.gz new file mode 100644 index 0000000..b50e4b6 Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-images-idx3-ubyte.gz differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-labels-idx1-ubyte b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-labels-idx1-ubyte new file mode 100644 index 0000000..d6b4c5d Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-labels-idx1-ubyte differ diff --git a/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-labels-idx1-ubyte.gz b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-labels-idx1-ubyte.gz new file mode 100644 index 0000000..707a576 Binary files /dev/null and b/coder/PATH_TO_STORE_TRAINSET/MNIST/raw/train-labels-idx1-ubyte.gz differ diff --git a/coder/digits_recognizer.py b/coder/digits_recognizer.py new file mode 100644 index 0000000..cc06540 --- /dev/null +++ b/coder/digits_recognizer.py @@ -0,0 +1,39 @@ +import numpy as np +import torch +import torchvision +import matplotlib.pyplot as plt +from time import time +from torchvision import datasets, transforms +from torch import nn, optim + +transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5,), (0.5,)), + ]) + +trainset = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform) +valset = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform) +trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) +valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True) + +dataiter = iter(trainloader) +images, labels = dataiter.next() + +print(images.shape) +print(labels.shape) + +plt.imshow(images[0].numpy().squeeze(), cmap='gray_r') +plt.show() + +# building nn model +input_size = 784 +hidden_sizes = [128, 64] +output_size = 10 + +model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), + nn.ReLU(), + nn.Linear(hidden_sizes[0], hidden_sizes[1]), + nn.ReLU(), + nn.Linear(hidden_sizes[1], output_size), + nn.LogSoftmax(dim=1)) +print(model) + diff --git a/coder/image.py b/coder/image.py index 86c3ee5..e4c651e 100644 --- a/coder/image.py +++ b/coder/image.py @@ -7,11 +7,12 @@ from sklearn.neural_network import MLPClassifier import pandas as pd import cv2 -#28x28 -train_data = np.genfromtxt('dataset/train.csv', delimiter=',', skip_header=1 ,max_rows=20000, encoding='utf-8') -test_data = np.genfromtxt('dataset/test.csv', delimiter=',' , skip_header=1, max_rows=20000, encoding='utf-8') - +# 28x28 +train_data = np.genfromtxt('dataset/train.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8') +test_data = np.genfromtxt('dataset/test.csv', delimiter=',', skip_header=1, max_rows=20000, encoding='utf-8') +# train_data = pd.read_csv('dataset/train.csv') +# test_data = pd.read_csv('dataset/test.csv') # training # recznie napisane cyfry @@ -20,45 +21,46 @@ digits = datasets.load_digits() y = digits.target x = digits.images.reshape((len(digits.images), -1)) +# print(type(y[0]), type(x[0])) +# ogarnac zbior, zwiekszyc warstwy -#ogarnac zbior, zwiekszyc warstwy +# x_train = train_data.iloc[:, 1:].values.astype('float32') +# y_train = train_data.iloc[:, 0].values.astype('int32') +# x_test = test_data.values.astype('float32') -x_train = train_data[0:20000, 1:] -y_train = train_data[0:20000, 0] -x_test = test_data[0:20000] -y_test = test_data[0:20000, 0] +x_train = train_data[0:10000, 1:] +y_train = train_data[0:10000, 0] +x_test = train_data[10001:20000, 1:] +y_test = train_data[10001:20000, 0].astype('int') + +print(type(y_test[0]), type(x_test[0])) # x_train = x[:900] # y_train = y[:900] # x_test = x[900:] # y_test = y[900:] -print(x_test[0].shape, y_test[9].shape) - -mlp = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100), activation='logistic', alpha=1e-4, +# 500, 500, 500, 500, 500 +mlp = MLPClassifier(hidden_layer_sizes=(150, 100, 100, 100), activation='logistic', alpha=1e-4, solver='sgd', tol=0.000000000001, random_state=1, - learning_rate_init=.1, verbose=True, max_iter=1000) + learning_rate_init=.1, verbose=True, max_iter=10000) mlp.fit(x_train, y_train) -print(123456789) predictions = mlp.predict(x_test) -print(123456789) print("Accuracy: ", accuracy_score(y_test, predictions)) - # image img = cv2.cvtColor(cv2.imread('test5.jpg'), cv2.COLOR_BGR2GRAY) -img = cv2.blur(img, (9, 9)) # poprawia jakosc +img = cv2.blur(img, (9, 9)) # poprawia jakosc img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA) img = img.reshape((len(img), -1)) -print(type(img)) -print(img.shape) -print(img) -plt.imshow(img ,cmap='binary') -plt.show() +# print(type(img)) +# print(img.shape) +# plt.imshow(img ,cmap='binary') +# plt.show() data = [] @@ -67,15 +69,16 @@ for i in range(rows): for j in range(cols): k = img[i, j] if k > 225: - k = 0 # brak czarnego + k = 0 # brak czarnego else: - k = 1 + k = 255 data.append(k) -data = np.asarray(data, dtype=np.float32) -print(data) +data = np.asarray(data, dtype=np.float64) +# print(data) +print(type(data)) predictions = mlp.predict([data]) -print("Liczba to:", predictions[0]) +print("Liczba to:", predictions[0].astype('int'))