Symulowanie-wizualne/sw_lab9-10_3.ipynb

163 KiB

Aleksandra Jonas, Aleksandra Gronowka, Iwona Christop

Przygotowanie danych

import os
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import json
from tensorflow import keras
%matplotlib inline
import sys
import subprocess
import pkg_resources
import numpy as np
peachy = []

required = { 'scikit-image'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed

if missing: 
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)

def load_train_data(input_dir, newSize=(227,227)):
    import numpy as np
    import pandas as pd
    import os
    from skimage.io import imread
    import cv2 as cv
    from pathlib import Path
    import random
    from shutil import copyfile, rmtree
    import json

    import seaborn as sns
    import matplotlib.pyplot as plt

    import matplotlib
    
    image_dir = Path(input_dir)
    categories_name = []
    for file in os.listdir(image_dir):
        d = os.path.join(image_dir, file)
        if os.path.isdir(d):
            categories_name.append(file)

    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]

    train_img = []
    categories_count=[]
    labels=[]
    for i, direc in enumerate(folders):
        count = 0
        
        for obj in direc.iterdir():
            if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':
                labels.append(os.path.basename(os.path.normpath(direc)))
                count += 1
                img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth
                if img.shape[-1] == 256:
                    img = np.repeat(img[..., np.newaxis], 3, axis=2)
                elif img.shape[-1] == 4:
                    img = img[:, :, :3]
                img = cv.resize(img, newSize, interpolation=cv.INTER_AREA)# zwraca ndarray
                img = img / 255#normalizacja
                train_img.append(img)
        categories_count.append(count)
    X={}
    X["values"] = np.array(train_img)
    X["categories_name"] = categories_name
    X["categories_count"] = categories_count
    X["labels"]=labels
    return X

def load_test_data(input_dir, newSize=(227,227)):
    import numpy as np
    import pandas as pd
    import os
    from skimage.io import imread
    import cv2 as cv
    from pathlib import Path
    import random
    from shutil import copyfile, rmtree
    import json

    import seaborn as sns
    import matplotlib.pyplot as plt

    import matplotlib

    image_path = Path(input_dir)

    labels_path = image_path.parents[0] / 'test_labels.json'

    jsonString = labels_path.read_text()
    objects = json.loads(jsonString)

    categories_name = []
    categories_count=[]
    count = 0
    c = objects[0]['value']
    for e in  objects:
        if e['value'] != c:
            categories_count.append(count)
            c = e['value']
            count = 1
        else:
            count += 1
        if not e['value'] in categories_name:
            categories_name.append(e['value'])

    categories_count.append(count)
    
    test_img = []

    labels=[]
    for e in objects:
        p = image_path / e['filename']
        img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth
        if img.shape[-1] == 4:
            img = img[:, :, :3]
        img = cv.resize(img, newSize, interpolation=cv.INTER_AREA)# zwraca ndarray
        img = img / 255#normalizacja
        test_img.append(img)
        labels.append(e['value'])

    X={}
    X["values"] = np.array(test_img)
    X["categories_name"] = categories_name
    X["categories_count"] = categories_count
    X["labels"]=labels
    return X

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf

data_train = load_train_data(f"./train_test_sw/train_sw_unity")
values_train = data_train['values']
labels_train = data_train['labels']
data_test = load_test_data("./train_test_sw/test_sw")
X_test = data_test['values']
y_test = data_test['labels']

X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42)

class_le = LabelEncoder()
y_train_enc = class_le.fit_transform(y_train)
y_validate_enc = class_le.fit_transform(y_validate)
y_test_enc = class_le.fit_transform(y_test)

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc))
validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc))
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc))

train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()

train_ds = (train_ds
                .shuffle(buffer_size=train_ds_size)
                .batch(batch_size=32, drop_remainder=True))
test_ds = (test_ds
                .shuffle(buffer_size=train_ds_size)
                .batch(batch_size=32, drop_remainder=True))
validation_ds = (validation_ds
                .shuffle(buffer_size=train_ds_size)
                .batch(batch_size=32, drop_remainder=True))

AlexNet

from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
import tensorflow as tf

alexnet = keras.models.Sequential([
keras.layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(227,227,3)),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Flatten(),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(.5),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(.5),
keras.layers.Dense(10, activation='softmax')
])
alexnet.compile(loss='sparse_categorical_crossentropy', optimizer=tf.optimizers.SGD(lr=.001), metrics=['accuracy'])
alexnet.summary()
WARNING:absl:`lr` is deprecated, please use `learning_rate` instead, or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.SGD.
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 55, 55, 96)        34944     
                                                                 
 batch_normalization (BatchN  (None, 55, 55, 96)       384       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 27, 27, 96)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 27, 27, 256)       614656    
                                                                 
 batch_normalization_1 (Batc  (None, 27, 27, 256)      1024      
 hNormalization)                                                 
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 13, 13, 256)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 13, 13, 384)       885120    
                                                                 
 batch_normalization_2 (Batc  (None, 13, 13, 384)      1536      
 hNormalization)                                                 
                                                                 
 conv2d_3 (Conv2D)           (None, 13, 13, 384)       1327488   
                                                                 
 batch_normalization_3 (Batc  (None, 13, 13, 384)      1536      
 hNormalization)                                                 
                                                                 
 conv2d_4 (Conv2D)           (None, 13, 13, 256)       884992    
                                                                 
 batch_normalization_4 (Batc  (None, 13, 13, 256)      1024      
 hNormalization)                                                 
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 6, 6, 256)        0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 9216)              0         
                                                                 
 dense (Dense)               (None, 4096)              37752832  
                                                                 
 dropout (Dropout)           (None, 4096)              0         
                                                                 
 dense_1 (Dense)             (None, 4096)              16781312  
                                                                 
 dropout_1 (Dropout)         (None, 4096)              0         
                                                                 
 dense_2 (Dense)             (None, 10)                40970     
                                                                 
=================================================================
Total params: 58,327,818
Trainable params: 58,325,066
Non-trainable params: 2,752
_________________________________________________________________
checkpoint = ModelCheckpoint("alex_2.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=20, verbose=1, mode='auto')

alex = alexnet.fit_generator(
    steps_per_epoch=len(train_ds), 
    generator=train_ds, 
    validation_data= validation_ds, 
    validation_steps=len(validation_ds), 
    epochs=25, 
    callbacks=[checkpoint,early])
WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen.
WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen.
/var/folders/6b/j4d60ym516x2s6wymzj707rh0000gn/T/ipykernel_37874/1998863165.py:4: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.
  alex = alexnet.fit_generator(
Epoch 1/25
2023-01-11 20:31:44.007163: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
50/50 [==============================] - ETA: 0s - loss: 3.8100 - accuracy: 0.3950
Epoch 1: val_accuracy improved from -inf to 0.29688, saving model to alex_2.h5
50/50 [==============================] - 46s 896ms/step - loss: 3.8100 - accuracy: 0.3950 - val_loss: 1.8302 - val_accuracy: 0.2969
Epoch 2/25
50/50 [==============================] - ETA: 0s - loss: 1.3961 - accuracy: 0.5281
Epoch 2: val_accuracy improved from 0.29688 to 0.38281, saving model to alex_2.h5
50/50 [==============================] - 46s 918ms/step - loss: 1.3961 - accuracy: 0.5281 - val_loss: 1.9363 - val_accuracy: 0.3828
Epoch 3/25
50/50 [==============================] - ETA: 0s - loss: 1.0805 - accuracy: 0.5956
Epoch 3: val_accuracy did not improve from 0.38281
50/50 [==============================] - 48s 955ms/step - loss: 1.0805 - accuracy: 0.5956 - val_loss: 2.2350 - val_accuracy: 0.3438
Epoch 4/25
50/50 [==============================] - ETA: 0s - loss: 0.8866 - accuracy: 0.6600
Epoch 4: val_accuracy did not improve from 0.38281
50/50 [==============================] - 60s 1s/step - loss: 0.8866 - accuracy: 0.6600 - val_loss: 2.0590 - val_accuracy: 0.3203
Epoch 5/25
50/50 [==============================] - ETA: 0s - loss: 0.7940 - accuracy: 0.6981
Epoch 5: val_accuracy did not improve from 0.38281
50/50 [==============================] - 74s 1s/step - loss: 0.7940 - accuracy: 0.6981 - val_loss: 2.4437 - val_accuracy: 0.3672
Epoch 6/25
50/50 [==============================] - ETA: 0s - loss: 0.6635 - accuracy: 0.7325
Epoch 6: val_accuracy improved from 0.38281 to 0.45833, saving model to alex_2.h5
50/50 [==============================] - 68s 1s/step - loss: 0.6635 - accuracy: 0.7325 - val_loss: 1.8824 - val_accuracy: 0.4583
Epoch 7/25
50/50 [==============================] - ETA: 0s - loss: 0.6460 - accuracy: 0.7406
Epoch 7: val_accuracy improved from 0.45833 to 0.49479, saving model to alex_2.h5
50/50 [==============================] - 67s 1s/step - loss: 0.6460 - accuracy: 0.7406 - val_loss: 1.3159 - val_accuracy: 0.4948
Epoch 8/25
50/50 [==============================] - ETA: 0s - loss: 0.5407 - accuracy: 0.7837
Epoch 8: val_accuracy improved from 0.49479 to 0.62500, saving model to alex_2.h5
50/50 [==============================] - 70s 1s/step - loss: 0.5407 - accuracy: 0.7837 - val_loss: 0.9668 - val_accuracy: 0.6250
Epoch 9/25
50/50 [==============================] - ETA: 0s - loss: 0.4979 - accuracy: 0.7994
Epoch 9: val_accuracy did not improve from 0.62500
50/50 [==============================] - 66s 1s/step - loss: 0.4979 - accuracy: 0.7994 - val_loss: 1.1679 - val_accuracy: 0.5677
Epoch 10/25
50/50 [==============================] - ETA: 0s - loss: 0.4673 - accuracy: 0.8256
Epoch 10: val_accuracy improved from 0.62500 to 0.74479, saving model to alex_2.h5
50/50 [==============================] - 69s 1s/step - loss: 0.4673 - accuracy: 0.8256 - val_loss: 0.6585 - val_accuracy: 0.7448
Epoch 11/25
50/50 [==============================] - ETA: 0s - loss: 0.4136 - accuracy: 0.8313
Epoch 11: val_accuracy did not improve from 0.74479
50/50 [==============================] - 84s 2s/step - loss: 0.4136 - accuracy: 0.8313 - val_loss: 0.8328 - val_accuracy: 0.7188
Epoch 12/25
50/50 [==============================] - ETA: 0s - loss: 0.3804 - accuracy: 0.8519
Epoch 12: val_accuracy improved from 0.74479 to 0.76302, saving model to alex_2.h5
50/50 [==============================] - 80s 2s/step - loss: 0.3804 - accuracy: 0.8519 - val_loss: 0.6793 - val_accuracy: 0.7630
Epoch 13/25
50/50 [==============================] - ETA: 0s - loss: 0.3550 - accuracy: 0.8587
Epoch 13: val_accuracy did not improve from 0.76302
50/50 [==============================] - 81s 2s/step - loss: 0.3550 - accuracy: 0.8587 - val_loss: 0.6221 - val_accuracy: 0.7630
Epoch 14/25
50/50 [==============================] - ETA: 0s - loss: 0.3337 - accuracy: 0.8744
Epoch 14: val_accuracy did not improve from 0.76302
50/50 [==============================] - 96s 2s/step - loss: 0.3337 - accuracy: 0.8744 - val_loss: 0.6317 - val_accuracy: 0.7578
Epoch 15/25
50/50 [==============================] - ETA: 0s - loss: 0.2860 - accuracy: 0.8950
Epoch 15: val_accuracy improved from 0.76302 to 0.79167, saving model to alex_2.h5
50/50 [==============================] - 79s 2s/step - loss: 0.2860 - accuracy: 0.8950 - val_loss: 0.6067 - val_accuracy: 0.7917
Epoch 16/25
50/50 [==============================] - ETA: 0s - loss: 0.2721 - accuracy: 0.8881
Epoch 16: val_accuracy improved from 0.79167 to 0.81250, saving model to alex_2.h5
50/50 [==============================] - 77s 2s/step - loss: 0.2721 - accuracy: 0.8881 - val_loss: 0.5126 - val_accuracy: 0.8125
Epoch 17/25
50/50 [==============================] - ETA: 0s - loss: 0.2564 - accuracy: 0.8969
Epoch 17: val_accuracy improved from 0.81250 to 0.81510, saving model to alex_2.h5
50/50 [==============================] - 78s 2s/step - loss: 0.2564 - accuracy: 0.8969 - val_loss: 0.5017 - val_accuracy: 0.8151
Epoch 18/25
50/50 [==============================] - ETA: 0s - loss: 0.2534 - accuracy: 0.8981
Epoch 18: val_accuracy improved from 0.81510 to 0.82292, saving model to alex_2.h5
50/50 [==============================] - 72s 1s/step - loss: 0.2534 - accuracy: 0.8981 - val_loss: 0.4199 - val_accuracy: 0.8229
Epoch 19/25
50/50 [==============================] - ETA: 0s - loss: 0.2327 - accuracy: 0.9075
Epoch 19: val_accuracy improved from 0.82292 to 0.84635, saving model to alex_2.h5
50/50 [==============================] - 70s 1s/step - loss: 0.2327 - accuracy: 0.9075 - val_loss: 0.4260 - val_accuracy: 0.8464
Epoch 20/25
50/50 [==============================] - ETA: 0s - loss: 0.2132 - accuracy: 0.9219
Epoch 20: val_accuracy did not improve from 0.84635
50/50 [==============================] - 69s 1s/step - loss: 0.2132 - accuracy: 0.9219 - val_loss: 0.6660 - val_accuracy: 0.7995
Epoch 21/25
50/50 [==============================] - ETA: 0s - loss: 0.1870 - accuracy: 0.9287
Epoch 21: val_accuracy did not improve from 0.84635
50/50 [==============================] - 70s 1s/step - loss: 0.1870 - accuracy: 0.9287 - val_loss: 0.5399 - val_accuracy: 0.8203
Epoch 22/25
50/50 [==============================] - ETA: 0s - loss: 0.1861 - accuracy: 0.9294
Epoch 22: val_accuracy did not improve from 0.84635
50/50 [==============================] - 78s 2s/step - loss: 0.1861 - accuracy: 0.9294 - val_loss: 0.5620 - val_accuracy: 0.8151
Epoch 23/25
50/50 [==============================] - ETA: 0s - loss: 0.1494 - accuracy: 0.9375
Epoch 23: val_accuracy improved from 0.84635 to 0.88281, saving model to alex_2.h5
50/50 [==============================] - 80s 2s/step - loss: 0.1494 - accuracy: 0.9375 - val_loss: 0.3850 - val_accuracy: 0.8828
Epoch 24/25
50/50 [==============================] - ETA: 0s - loss: 0.1548 - accuracy: 0.9481
Epoch 24: val_accuracy did not improve from 0.88281
50/50 [==============================] - 81s 2s/step - loss: 0.1548 - accuracy: 0.9481 - val_loss: 0.4789 - val_accuracy: 0.8646
Epoch 25/25
50/50 [==============================] - ETA: 0s - loss: 0.1541 - accuracy: 0.9456
Epoch 25: val_accuracy did not improve from 0.88281
50/50 [==============================] - 82s 2s/step - loss: 0.1541 - accuracy: 0.9456 - val_loss: 0.4806 - val_accuracy: 0.8411
plt.plot(alex.history["accuracy"])
plt.plot(alex.history['val_accuracy'])
plt.plot(alex.history['loss'])
plt.plot(alex.history['val_loss'])
plt.title(f"Model accuracy - AlexNet")
plt.ylabel("Value")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy","Loss","Validation Loss"])
plt.show()
alexnet.evaluate(test_ds)
8/8 [==============================] - 4s 450ms/step - loss: 0.4419 - accuracy: 0.8516
[0.4419291615486145, 0.8515625]

MLP

X_train = X_train.flatten().reshape(X_train.shape[0], int(np.prod(X_train.shape) / X_train.shape[0]))
X_test = X_test.flatten().reshape(X_test.shape[0], int(np.prod(X_test.shape) / X_test.shape[0]))
from sklearn.neural_network import MLPClassifier
from tqdm import tqdm

def test_mlp(X_train, y_train, X_val, y_val, X_test, y_test, hidden_layer_sizes, alpha, max_iter):
    mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, alpha=alpha, max_iter=max_iter)
    accuracy = []

    result = {
        'num_layers': len(hidden_layer_sizes),
        'layer_sizes': hidden_layer_sizes,
        'regularization': alpha,
        'max_iter': max_iter
    }

    for i in tqdm(range(max_iter)):
        mlp.partial_fit(X_train, y_train, np.unique(y_train))
        accuracy.append(mlp.score(X_train, y_train))
        if i == 50:
            result['checkpoint_train_accuracy'] = np.mean(accuracy)
            result['checkpoint_val_accuracy'] = mlp.score(X_val, y_val)
            result['checkpoint_test_accuracy'] = mlp.score(X_test, y_test)

    result['full_train_accuracy'] = np.mean(accuracy)
    result['full_val_accuracy'] = mlp.score(X_val, y_val)
    result['full_test_accuracy'] = mlp.score(X_test, y_test)
    result['accuracy_curve'] = accuracy
    result['loss_curve'] = mlp.loss_curve_

    return result

def print_result(result):
    print(f"NUMBER OF HIDDEN LAYERS = {result['num_layers']}")
    print(f"HIDDEN LAYER SIZES = {result['layer_sizes']}")
    print(f"REGULARIZATION = {result['regularization']}")
    print("\n50 EPOCHS")
    print(f"train_accuracy = {round(result['checkpoint_train_accuracy'] * 100, 2)}%")
    print(f"val_accuracy = {round(result['checkpoint_val_accuracy'] * 100, 2)}%")
    print(f"test_accuracy = {round(result['checkpoint_test_accuracy'] * 100, 2)}%")
    print(f"\n{result['max_iter']} EPOCHS")
    print(f"train_accuracy = {round(result['full_train_accuracy'] * 100, 2)}%")
    print(f"val_accuracy = {round(result['checkpoint_val_accuracy'] * 100, 2)}%")
    print(f"test_accuracy = {round(result['full_test_accuracy'] * 100, 2)}%")

def get_plot(result):
    f = plt.figure(figsize=(12,6))
    plt.plot(result['loss_curve'], label='loss')
    plt.plot(result['accuracy_curve'], label='accuracy')
    plt.legend(loc='best')
    plt.xlabel('number of iterations')
    plt.grid()
    plt.show()

from sklearn.model_selection import train_test_split

NEW_SIZE = 64

ONE_LAYER = (286,)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.05, random_state=42)

all_results = []
all_results.append(test_mlp(X_train, y_train, X_val, y_val, X_test, y_test, hidden_layer_sizes=ONE_LAYER, alpha=0.1, max_iter=200))
all_results.append(test_mlp(X_train, y_train, X_val, y_val, X_test, y_test, hidden_layer_sizes=ONE_LAYER, alpha=0.001, max_iter=200))
100%|██████████| 200/200 [42:45<00:00, 12.83s/it]
100%|██████████| 200/200 [39:45<00:00, 11.93s/it]
for result in all_results:
    print_result(result)
    get_plot(result)
NUMBER OF HIDDEN LAYERS = 1
HIDDEN LAYER SIZES = (286,)
REGULARIZATION = 0.1

50 EPOCHS
train_accuracy = 68.78%
val_accuracy = 58.02%
test_accuracy = 53.67%

200 EPOCHS
train_accuracy = 90.29%
val_accuracy = 58.02%
test_accuracy = 59.07%
NUMBER OF HIDDEN LAYERS = 1
HIDDEN LAYER SIZES = (286,)
REGULARIZATION = 0.001

50 EPOCHS
train_accuracy = 74.1%
val_accuracy = 56.79%
test_accuracy = 50.19%

200 EPOCHS
train_accuracy = 92.63%
val_accuracy = 56.79%
test_accuracy = 54.44%