Symulowanie-wizualne/sw_lab9-10.ipynb
2022-12-14 17:59:09 +01:00

15 KiB
Raw Permalink Blame History

Aleksandra Jonas, Aleksandra Gronowska, Iwona Christop

Zadanie 9-10

- VGG16 + ResNet

- AlexNet, VGG16, ResNet with lantvillage-dataset

- data generation using Unity - Jacek Kaluzny

- data augmentation - edge filters, rotation, textures

VGG16 + ResNet on train_test_sw

Przygotowanie danych

from IPython.display import Image, display
import sys
import subprocess
import pkg_resources
import numpy as np

required = { 'scikit-image'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed
# VGG16 requires images to be of dim = (224, 224, 3)
newSize = (224,224)

if missing: 
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)

def load_train_data(input_dir):
    import numpy as np
    import pandas as pd
    import os
    from skimage.io import imread
    import cv2 as cv
    from pathlib import Path
    import random
    from shutil import copyfile, rmtree
    import json

    import seaborn as sns
    import matplotlib.pyplot as plt

    import matplotlib
    
    image_dir = Path(input_dir)
    categories_name = []
    for file in os.listdir(image_dir):
        d = os.path.join(image_dir, file)
        if os.path.isdir(d):
            categories_name.append(file)

    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]

    train_img = []
    categories_count=[]
    labels=[]
    for i, direc in enumerate(folders):
        count = 0
        for obj in direc.iterdir():
            if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':
                labels.append(os.path.basename(os.path.normpath(direc)))
                count += 1
                img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth
                img = img[:, :, :3]
                img = cv.resize(img, newSize, interpolation=cv.INTER_AREA)# zwraca ndarray
                img = img / 255 #normalizacja
                train_img.append(img)
        categories_count.append(count)
    X={}
    X["values"] = np.array(train_img)
    X["categories_name"] = categories_name
    X["categories_count"] = categories_count
    X["labels"]=labels
    return X

def load_test_data(input_dir):
    import numpy as np
    import pandas as pd
    import os
    from skimage.io import imread
    import cv2 as cv
    from pathlib import Path
    import random
    from shutil import copyfile, rmtree
    import json

    import seaborn as sns
    import matplotlib.pyplot as plt

    import matplotlib

    image_path = Path(input_dir)

    labels_path = image_path.parents[0] / 'test_labels.json'

    jsonString = labels_path.read_text()
    objects = json.loads(jsonString)

    categories_name = []
    categories_count=[]
    count = 0
    c = objects[0]['value']
    for e in  objects:
        if e['value'] != c:
            categories_count.append(count)
            c = e['value']
            count = 1
        else:
            count += 1
        if not e['value'] in categories_name:
            categories_name.append(e['value'])

    categories_count.append(count)
    
    test_img = []

    labels=[]
    for e in objects:
        p = image_path / e['filename']
        img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth
        img = img[:, :, :3]
        img = cv.resize(img, newSize, interpolation=cv.INTER_AREA)# zwraca ndarray
        img = img / 255#normalizacja
        test_img.append(img)
        labels.append(e['value'])

    X={}
    X["values"] = np.array(test_img)
    X["categories_name"] = categories_name
    X["categories_count"] = categories_count
    X["labels"]=labels
    return X
def create_tf_ds(X_train, y_train_enc, X_validate, y_validate_enc, X_test, y_test_enc):
    import tensorflow as tf
    
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc))
    validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc))
    test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc))

    train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
    test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
    validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()

    print("Training data size:", train_ds_size)
    print("Test data size:", test_ds_size)
    print("Validation data size:", validation_ds_size)

    train_ds = (train_ds
                  .shuffle(buffer_size=train_ds_size)
                  .batch(batch_size=32, drop_remainder=True))
    test_ds = (test_ds
                    .shuffle(buffer_size=train_ds_size)
                    .batch(batch_size=32, drop_remainder=True))
    validation_ds = (validation_ds
                    .shuffle(buffer_size=train_ds_size)
                    .batch(batch_size=32, drop_remainder=True))
    
    return train_ds, test_ds, validation_ds
def get_run_logdir(root_logdir):
    import os
    import time

    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)
def diagram_setup(model_name):
    from tensorflow import keras
    import os
    
    root_logdir = os.path.join(os.curdir, f"logs\\\\fit\\\\\{model_name}\\\\")
    
    run_logdir = get_run_logdir(root_logdir)
    tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
# Data load
data_train = load_train_data("./train_test_sw/train_sw")
values_train = data_train['values']
labels_train = data_train['labels']

data_test = load_test_data("./train_test_sw/test_sw")
X_test = data_test['values']
y_test = data_test['labels']
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In [7], line 2
      1 # Data load
----> 2 data_train = load_train_data("./train_test_sw/train_sw")
      3 values_train = data_train['values']
      4 labels_train = data_train['labels']

Cell In [2], line 27, in load_train_data(input_dir)
     24 from shutil import copyfile, rmtree
     25 import json
---> 27 import seaborn as sns
     28 import matplotlib.pyplot as plt
     30 import matplotlib

ModuleNotFoundError: No module named 'seaborn'
from sklearn.model_selection import train_test_split
X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42)
from sklearn.preprocessing import LabelEncoder
class_le = LabelEncoder()
y_train_enc = class_le.fit_transform(y_train)
y_validate_enc = class_le.fit_transform(y_validate)
y_test_enc = class_le.fit_transform(y_test)
train_ds_vgg_sw, test_ds_vgg_sw, validation_ds_vgg_sw = create_tf_ds(X_train, y_train_enc, X_validate, y_validate_enc, X_test, y_test_enc)
diagram_setup('vgg_sw')

VGG

import keras,os
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

model_VGG = Sequential()
model_VGG.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model_VGG.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model_VGG.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model_VGG.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model_VGG.add(Flatten())
model_VGG.add(Dense(units=4096,activation="relu"))
model_VGG.add(Dense(units=4096,activation="relu"))
model_VGG.add(Dense(units=2, activation="softmax"))
from keras.optimizers import Adam
opt = Adam(lr=0.001)
model_VGG.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
model_VGG.summary()
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=20, verbose=1, mode='auto')
hist = model_VGG.fit_generator(steps_per_epoch=100,generator=train_ds_vgg_sw, validation_data= validation_ds_vgg_sw, validation_steps=10,epochs=5,callbacks=[checkpoint,early])
import matplotlib.pyplot as plt
plt.plot(hist.history["acc"])
plt.plot(hist.history['val_acc'])
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy","loss","Validation Loss"])
plt.show()