163 KiB
163 KiB
Aleksandra Jonas, Aleksandra Gronowka, Iwona Christop
Przygotowanie danych
import os
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import json
from tensorflow import keras
%matplotlib inline
import sys
import subprocess
import pkg_resources
import numpy as np
peachy = []
required = { 'scikit-image'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed
if missing:
python = sys.executable
subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)
def load_train_data(input_dir, newSize=(227,227)):
import numpy as np
import pandas as pd
import os
from skimage.io import imread
import cv2 as cv
from pathlib import Path
import random
from shutil import copyfile, rmtree
import json
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
image_dir = Path(input_dir)
categories_name = []
for file in os.listdir(image_dir):
d = os.path.join(image_dir, file)
if os.path.isdir(d):
categories_name.append(file)
folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
train_img = []
categories_count=[]
labels=[]
for i, direc in enumerate(folders):
count = 0
for obj in direc.iterdir():
if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':
labels.append(os.path.basename(os.path.normpath(direc)))
count += 1
img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth
if img.shape[-1] == 256:
img = np.repeat(img[..., np.newaxis], 3, axis=2)
elif img.shape[-1] == 4:
img = img[:, :, :3]
img = cv.resize(img, newSize, interpolation=cv.INTER_AREA)# zwraca ndarray
img = img / 255#normalizacja
train_img.append(img)
categories_count.append(count)
X={}
X["values"] = np.array(train_img)
X["categories_name"] = categories_name
X["categories_count"] = categories_count
X["labels"]=labels
return X
def load_test_data(input_dir, newSize=(227,227)):
import numpy as np
import pandas as pd
import os
from skimage.io import imread
import cv2 as cv
from pathlib import Path
import random
from shutil import copyfile, rmtree
import json
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
image_path = Path(input_dir)
labels_path = image_path.parents[0] / 'test_labels.json'
jsonString = labels_path.read_text()
objects = json.loads(jsonString)
categories_name = []
categories_count=[]
count = 0
c = objects[0]['value']
for e in objects:
if e['value'] != c:
categories_count.append(count)
c = e['value']
count = 1
else:
count += 1
if not e['value'] in categories_name:
categories_name.append(e['value'])
categories_count.append(count)
test_img = []
labels=[]
for e in objects:
p = image_path / e['filename']
img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth
if img.shape[-1] == 4:
img = img[:, :, :3]
img = cv.resize(img, newSize, interpolation=cv.INTER_AREA)# zwraca ndarray
img = img / 255#normalizacja
test_img.append(img)
labels.append(e['value'])
X={}
X["values"] = np.array(test_img)
X["categories_name"] = categories_name
X["categories_count"] = categories_count
X["labels"]=labels
return X
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
data_train = load_train_data(f"./train_test_sw/train_sw_unity")
values_train = data_train['values']
labels_train = data_train['labels']
data_test = load_test_data("./train_test_sw/test_sw")
X_test = data_test['values']
y_test = data_test['labels']
X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42)
class_le = LabelEncoder()
y_train_enc = class_le.fit_transform(y_train)
y_validate_enc = class_le.fit_transform(y_validate)
y_test_enc = class_le.fit_transform(y_test)
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc))
validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc))
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc))
train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()
train_ds = (train_ds
.shuffle(buffer_size=train_ds_size)
.batch(batch_size=32, drop_remainder=True))
test_ds = (test_ds
.shuffle(buffer_size=train_ds_size)
.batch(batch_size=32, drop_remainder=True))
validation_ds = (validation_ds
.shuffle(buffer_size=train_ds_size)
.batch(batch_size=32, drop_remainder=True))
AlexNet
from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
import tensorflow as tf
alexnet = keras.models.Sequential([
keras.layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(227,227,3)),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Flatten(),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(.5),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(.5),
keras.layers.Dense(10, activation='softmax')
])
alexnet.compile(loss='sparse_categorical_crossentropy', optimizer=tf.optimizers.SGD(lr=.001), metrics=['accuracy'])
alexnet.summary()
WARNING:absl:`lr` is deprecated, please use `learning_rate` instead, or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.SGD.
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 55, 55, 96) 34944 batch_normalization (BatchN (None, 55, 55, 96) 384 ormalization) max_pooling2d (MaxPooling2D (None, 27, 27, 96) 0 ) conv2d_1 (Conv2D) (None, 27, 27, 256) 614656 batch_normalization_1 (Batc (None, 27, 27, 256) 1024 hNormalization) max_pooling2d_1 (MaxPooling (None, 13, 13, 256) 0 2D) conv2d_2 (Conv2D) (None, 13, 13, 384) 885120 batch_normalization_2 (Batc (None, 13, 13, 384) 1536 hNormalization) conv2d_3 (Conv2D) (None, 13, 13, 384) 1327488 batch_normalization_3 (Batc (None, 13, 13, 384) 1536 hNormalization) conv2d_4 (Conv2D) (None, 13, 13, 256) 884992 batch_normalization_4 (Batc (None, 13, 13, 256) 1024 hNormalization) max_pooling2d_2 (MaxPooling (None, 6, 6, 256) 0 2D) flatten (Flatten) (None, 9216) 0 dense (Dense) (None, 4096) 37752832 dropout (Dropout) (None, 4096) 0 dense_1 (Dense) (None, 4096) 16781312 dropout_1 (Dropout) (None, 4096) 0 dense_2 (Dense) (None, 10) 40970 ================================================================= Total params: 58,327,818 Trainable params: 58,325,066 Non-trainable params: 2,752 _________________________________________________________________
checkpoint = ModelCheckpoint("alex_2.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=20, verbose=1, mode='auto')
alex = alexnet.fit_generator(
steps_per_epoch=len(train_ds),
generator=train_ds,
validation_data= validation_ds,
validation_steps=len(validation_ds),
epochs=25,
callbacks=[checkpoint,early])
WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen.
WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen. /var/folders/6b/j4d60ym516x2s6wymzj707rh0000gn/T/ipykernel_37874/1998863165.py:4: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators. alex = alexnet.fit_generator(
Epoch 1/25
2023-01-11 20:31:44.007163: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
50/50 [==============================] - ETA: 0s - loss: 3.8100 - accuracy: 0.3950 Epoch 1: val_accuracy improved from -inf to 0.29688, saving model to alex_2.h5 50/50 [==============================] - 46s 896ms/step - loss: 3.8100 - accuracy: 0.3950 - val_loss: 1.8302 - val_accuracy: 0.2969 Epoch 2/25 50/50 [==============================] - ETA: 0s - loss: 1.3961 - accuracy: 0.5281 Epoch 2: val_accuracy improved from 0.29688 to 0.38281, saving model to alex_2.h5 50/50 [==============================] - 46s 918ms/step - loss: 1.3961 - accuracy: 0.5281 - val_loss: 1.9363 - val_accuracy: 0.3828 Epoch 3/25 50/50 [==============================] - ETA: 0s - loss: 1.0805 - accuracy: 0.5956 Epoch 3: val_accuracy did not improve from 0.38281 50/50 [==============================] - 48s 955ms/step - loss: 1.0805 - accuracy: 0.5956 - val_loss: 2.2350 - val_accuracy: 0.3438 Epoch 4/25 50/50 [==============================] - ETA: 0s - loss: 0.8866 - accuracy: 0.6600 Epoch 4: val_accuracy did not improve from 0.38281 50/50 [==============================] - 60s 1s/step - loss: 0.8866 - accuracy: 0.6600 - val_loss: 2.0590 - val_accuracy: 0.3203 Epoch 5/25 50/50 [==============================] - ETA: 0s - loss: 0.7940 - accuracy: 0.6981 Epoch 5: val_accuracy did not improve from 0.38281 50/50 [==============================] - 74s 1s/step - loss: 0.7940 - accuracy: 0.6981 - val_loss: 2.4437 - val_accuracy: 0.3672 Epoch 6/25 50/50 [==============================] - ETA: 0s - loss: 0.6635 - accuracy: 0.7325 Epoch 6: val_accuracy improved from 0.38281 to 0.45833, saving model to alex_2.h5 50/50 [==============================] - 68s 1s/step - loss: 0.6635 - accuracy: 0.7325 - val_loss: 1.8824 - val_accuracy: 0.4583 Epoch 7/25 50/50 [==============================] - ETA: 0s - loss: 0.6460 - accuracy: 0.7406 Epoch 7: val_accuracy improved from 0.45833 to 0.49479, saving model to alex_2.h5 50/50 [==============================] - 67s 1s/step - loss: 0.6460 - accuracy: 0.7406 - val_loss: 1.3159 - val_accuracy: 0.4948 Epoch 8/25 50/50 [==============================] - ETA: 0s - loss: 0.5407 - accuracy: 0.7837 Epoch 8: val_accuracy improved from 0.49479 to 0.62500, saving model to alex_2.h5 50/50 [==============================] - 70s 1s/step - loss: 0.5407 - accuracy: 0.7837 - val_loss: 0.9668 - val_accuracy: 0.6250 Epoch 9/25 50/50 [==============================] - ETA: 0s - loss: 0.4979 - accuracy: 0.7994 Epoch 9: val_accuracy did not improve from 0.62500 50/50 [==============================] - 66s 1s/step - loss: 0.4979 - accuracy: 0.7994 - val_loss: 1.1679 - val_accuracy: 0.5677 Epoch 10/25 50/50 [==============================] - ETA: 0s - loss: 0.4673 - accuracy: 0.8256 Epoch 10: val_accuracy improved from 0.62500 to 0.74479, saving model to alex_2.h5 50/50 [==============================] - 69s 1s/step - loss: 0.4673 - accuracy: 0.8256 - val_loss: 0.6585 - val_accuracy: 0.7448 Epoch 11/25 50/50 [==============================] - ETA: 0s - loss: 0.4136 - accuracy: 0.8313 Epoch 11: val_accuracy did not improve from 0.74479 50/50 [==============================] - 84s 2s/step - loss: 0.4136 - accuracy: 0.8313 - val_loss: 0.8328 - val_accuracy: 0.7188 Epoch 12/25 50/50 [==============================] - ETA: 0s - loss: 0.3804 - accuracy: 0.8519 Epoch 12: val_accuracy improved from 0.74479 to 0.76302, saving model to alex_2.h5 50/50 [==============================] - 80s 2s/step - loss: 0.3804 - accuracy: 0.8519 - val_loss: 0.6793 - val_accuracy: 0.7630 Epoch 13/25 50/50 [==============================] - ETA: 0s - loss: 0.3550 - accuracy: 0.8587 Epoch 13: val_accuracy did not improve from 0.76302 50/50 [==============================] - 81s 2s/step - loss: 0.3550 - accuracy: 0.8587 - val_loss: 0.6221 - val_accuracy: 0.7630 Epoch 14/25 50/50 [==============================] - ETA: 0s - loss: 0.3337 - accuracy: 0.8744 Epoch 14: val_accuracy did not improve from 0.76302 50/50 [==============================] - 96s 2s/step - loss: 0.3337 - accuracy: 0.8744 - val_loss: 0.6317 - val_accuracy: 0.7578 Epoch 15/25 50/50 [==============================] - ETA: 0s - loss: 0.2860 - accuracy: 0.8950 Epoch 15: val_accuracy improved from 0.76302 to 0.79167, saving model to alex_2.h5 50/50 [==============================] - 79s 2s/step - loss: 0.2860 - accuracy: 0.8950 - val_loss: 0.6067 - val_accuracy: 0.7917 Epoch 16/25 50/50 [==============================] - ETA: 0s - loss: 0.2721 - accuracy: 0.8881 Epoch 16: val_accuracy improved from 0.79167 to 0.81250, saving model to alex_2.h5 50/50 [==============================] - 77s 2s/step - loss: 0.2721 - accuracy: 0.8881 - val_loss: 0.5126 - val_accuracy: 0.8125 Epoch 17/25 50/50 [==============================] - ETA: 0s - loss: 0.2564 - accuracy: 0.8969 Epoch 17: val_accuracy improved from 0.81250 to 0.81510, saving model to alex_2.h5 50/50 [==============================] - 78s 2s/step - loss: 0.2564 - accuracy: 0.8969 - val_loss: 0.5017 - val_accuracy: 0.8151 Epoch 18/25 50/50 [==============================] - ETA: 0s - loss: 0.2534 - accuracy: 0.8981 Epoch 18: val_accuracy improved from 0.81510 to 0.82292, saving model to alex_2.h5 50/50 [==============================] - 72s 1s/step - loss: 0.2534 - accuracy: 0.8981 - val_loss: 0.4199 - val_accuracy: 0.8229 Epoch 19/25 50/50 [==============================] - ETA: 0s - loss: 0.2327 - accuracy: 0.9075 Epoch 19: val_accuracy improved from 0.82292 to 0.84635, saving model to alex_2.h5 50/50 [==============================] - 70s 1s/step - loss: 0.2327 - accuracy: 0.9075 - val_loss: 0.4260 - val_accuracy: 0.8464 Epoch 20/25 50/50 [==============================] - ETA: 0s - loss: 0.2132 - accuracy: 0.9219 Epoch 20: val_accuracy did not improve from 0.84635 50/50 [==============================] - 69s 1s/step - loss: 0.2132 - accuracy: 0.9219 - val_loss: 0.6660 - val_accuracy: 0.7995 Epoch 21/25 50/50 [==============================] - ETA: 0s - loss: 0.1870 - accuracy: 0.9287 Epoch 21: val_accuracy did not improve from 0.84635 50/50 [==============================] - 70s 1s/step - loss: 0.1870 - accuracy: 0.9287 - val_loss: 0.5399 - val_accuracy: 0.8203 Epoch 22/25 50/50 [==============================] - ETA: 0s - loss: 0.1861 - accuracy: 0.9294 Epoch 22: val_accuracy did not improve from 0.84635 50/50 [==============================] - 78s 2s/step - loss: 0.1861 - accuracy: 0.9294 - val_loss: 0.5620 - val_accuracy: 0.8151 Epoch 23/25 50/50 [==============================] - ETA: 0s - loss: 0.1494 - accuracy: 0.9375 Epoch 23: val_accuracy improved from 0.84635 to 0.88281, saving model to alex_2.h5 50/50 [==============================] - 80s 2s/step - loss: 0.1494 - accuracy: 0.9375 - val_loss: 0.3850 - val_accuracy: 0.8828 Epoch 24/25 50/50 [==============================] - ETA: 0s - loss: 0.1548 - accuracy: 0.9481 Epoch 24: val_accuracy did not improve from 0.88281 50/50 [==============================] - 81s 2s/step - loss: 0.1548 - accuracy: 0.9481 - val_loss: 0.4789 - val_accuracy: 0.8646 Epoch 25/25 50/50 [==============================] - ETA: 0s - loss: 0.1541 - accuracy: 0.9456 Epoch 25: val_accuracy did not improve from 0.88281 50/50 [==============================] - 82s 2s/step - loss: 0.1541 - accuracy: 0.9456 - val_loss: 0.4806 - val_accuracy: 0.8411
plt.plot(alex.history["accuracy"])
plt.plot(alex.history['val_accuracy'])
plt.plot(alex.history['loss'])
plt.plot(alex.history['val_loss'])
plt.title(f"Model accuracy - AlexNet")
plt.ylabel("Value")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy","Loss","Validation Loss"])
plt.show()
alexnet.evaluate(test_ds)
8/8 [==============================] - 4s 450ms/step - loss: 0.4419 - accuracy: 0.8516
[0.4419291615486145, 0.8515625]
MLP
X_train = X_train.flatten().reshape(X_train.shape[0], int(np.prod(X_train.shape) / X_train.shape[0]))
X_test = X_test.flatten().reshape(X_test.shape[0], int(np.prod(X_test.shape) / X_test.shape[0]))
from sklearn.neural_network import MLPClassifier
from tqdm import tqdm
def test_mlp(X_train, y_train, X_val, y_val, X_test, y_test, hidden_layer_sizes, alpha, max_iter):
mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, alpha=alpha, max_iter=max_iter)
accuracy = []
result = {
'num_layers': len(hidden_layer_sizes),
'layer_sizes': hidden_layer_sizes,
'regularization': alpha,
'max_iter': max_iter
}
for i in tqdm(range(max_iter)):
mlp.partial_fit(X_train, y_train, np.unique(y_train))
accuracy.append(mlp.score(X_train, y_train))
if i == 50:
result['checkpoint_train_accuracy'] = np.mean(accuracy)
result['checkpoint_val_accuracy'] = mlp.score(X_val, y_val)
result['checkpoint_test_accuracy'] = mlp.score(X_test, y_test)
result['full_train_accuracy'] = np.mean(accuracy)
result['full_val_accuracy'] = mlp.score(X_val, y_val)
result['full_test_accuracy'] = mlp.score(X_test, y_test)
result['accuracy_curve'] = accuracy
result['loss_curve'] = mlp.loss_curve_
return result
def print_result(result):
print(f"NUMBER OF HIDDEN LAYERS = {result['num_layers']}")
print(f"HIDDEN LAYER SIZES = {result['layer_sizes']}")
print(f"REGULARIZATION = {result['regularization']}")
print("\n50 EPOCHS")
print(f"train_accuracy = {round(result['checkpoint_train_accuracy'] * 100, 2)}%")
print(f"val_accuracy = {round(result['checkpoint_val_accuracy'] * 100, 2)}%")
print(f"test_accuracy = {round(result['checkpoint_test_accuracy'] * 100, 2)}%")
print(f"\n{result['max_iter']} EPOCHS")
print(f"train_accuracy = {round(result['full_train_accuracy'] * 100, 2)}%")
print(f"val_accuracy = {round(result['checkpoint_val_accuracy'] * 100, 2)}%")
print(f"test_accuracy = {round(result['full_test_accuracy'] * 100, 2)}%")
def get_plot(result):
f = plt.figure(figsize=(12,6))
plt.plot(result['loss_curve'], label='loss')
plt.plot(result['accuracy_curve'], label='accuracy')
plt.legend(loc='best')
plt.xlabel('number of iterations')
plt.grid()
plt.show()
from sklearn.model_selection import train_test_split
NEW_SIZE = 64
ONE_LAYER = (286,)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.05, random_state=42)
all_results = []
all_results.append(test_mlp(X_train, y_train, X_val, y_val, X_test, y_test, hidden_layer_sizes=ONE_LAYER, alpha=0.1, max_iter=200))
all_results.append(test_mlp(X_train, y_train, X_val, y_val, X_test, y_test, hidden_layer_sizes=ONE_LAYER, alpha=0.001, max_iter=200))
100%|██████████| 200/200 [42:45<00:00, 12.83s/it] 100%|██████████| 200/200 [39:45<00:00, 11.93s/it]
for result in all_results:
print_result(result)
get_plot(result)
NUMBER OF HIDDEN LAYERS = 1 HIDDEN LAYER SIZES = (286,) REGULARIZATION = 0.1 50 EPOCHS train_accuracy = 68.78% val_accuracy = 58.02% test_accuracy = 53.67% 200 EPOCHS train_accuracy = 90.29% val_accuracy = 58.02% test_accuracy = 59.07%
NUMBER OF HIDDEN LAYERS = 1 HIDDEN LAYER SIZES = (286,) REGULARIZATION = 0.001 50 EPOCHS train_accuracy = 74.1% val_accuracy = 56.79% test_accuracy = 50.19% 200 EPOCHS train_accuracy = 92.63% val_accuracy = 56.79% test_accuracy = 54.44%