From c3b8c6d8c327b733fef9c13ab35ddc53ba6b3a58 Mon Sep 17 00:00:00 2001 From: jakubknczny Date: Sun, 2 May 2021 22:01:32 +0200 Subject: [PATCH] add py scripts lab5 --- lab5/Dockerfile | 19 +++++++++++++ lab5/create_dataset.py | 35 ++++++++++++++++++++++++ lab5/eval.py | 23 ++++++++++++++++ lab5/requirements.txt | 5 ++++ lab5/script.sh | 6 +++++ lab5/test_eval.py | 61 ++++++++++++++++++++++++++++++++++++++++++ lab5/train.py | 36 +++++++++++++++++++++++++ 7 files changed, 185 insertions(+) create mode 100644 lab5/Dockerfile create mode 100644 lab5/create_dataset.py create mode 100644 lab5/eval.py create mode 100644 lab5/requirements.txt create mode 100644 lab5/script.sh create mode 100644 lab5/test_eval.py create mode 100644 lab5/train.py diff --git a/lab5/Dockerfile b/lab5/Dockerfile new file mode 100644 index 0000000..f3598ed --- /dev/null +++ b/lab5/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:latest + +RUN apt update >>/dev/null +RUN apt install -y apt-utils >>/dev/null +RUN apt install -y python3.8 >>/dev/null +RUN apt install -y python3-pip >>/dev/null +RUN apt install -y unzip >>/dev/null + +WORKDIR /app + +COPY ./test_eval.py ./ +COPY ./script.sh ./ +RUN chmod +x script.sh + +COPY ./requirements.txt ./ + +RUN pip3 install -r requirements.txt >>/dev/null + +CMD ./script.sh diff --git a/lab5/create_dataset.py b/lab5/create_dataset.py new file mode 100644 index 0000000..e600bcd --- /dev/null +++ b/lab5/create_dataset.py @@ -0,0 +1,35 @@ +import pandas as pd +from sklearn import preprocessing +from sklearn.model_selection import train_test_split + +df = pd.read_csv('smart_grid_stability_augmented.csv') + +scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1]) +df_norm_array = scaler.transform(df.iloc[:, 0:-1]) +df_norm = pd.DataFrame(data=df_norm_array, + columns=df.columns[:-1]) +df_norm['stabf'] = df['stabf'] + +df_norm_data = df_norm.copy() +df_norm_data = df_norm_data.drop('stab', axis=1) +df_norm_labels = df_norm_data.pop('stabf') + +X_train, X_testAndValid, Y_train, Y_testAndValid = train_test_split( + df_norm_data, + df_norm_labels, + test_size=0.2, + random_state=42) + +X_test, X_valid, Y_test, Y_valid = train_test_split( + X_testAndValid, + Y_testAndValid, + test_size=0.5, + random_state=42) + +train = pd.concat([X_train, Y_train], axis=1) +test = pd.concat([X_test, Y_test], axis=1) +valid = pd.concat([X_valid, Y_valid], axis=1) + +train.to_csv('train.cs', index_col = False) +test.to_csv('test.csv', index_col = False) +valid.to_csv('valid.csv', index_col = False) diff --git a/lab5/eval.py b/lab5/eval.py new file mode 100644 index 0000000..57a1545 --- /dev/null +++ b/lab5/eval.py @@ -0,0 +1,23 @@ +import pandas as pd +import numpy as np +import tensorflow as tf + +def onezero(label): + return 0 if label == 'unstable' else 1 + + +X_test = pd.read_csv('test.csv') +Y_test = X_test.pop('stabf') + +Y_test_one_zero = [onezero(x) for x in Y_test] +Y_test_onehot = np.eye(2)[Y_test_one_zero] + +model = tf.keras.models.load_model('grid_stability.h5') + +results = model.evaluate(X_test, Y_test_onehot, batch_size=64) + +f = open('eval.csv', 'a+') + +f.write(results[0], ',') +f.write(results[1], ',') + diff --git a/lab5/requirements.txt b/lab5/requirements.txt new file mode 100644 index 0000000..c4eef4d --- /dev/null +++ b/lab5/requirements.txt @@ -0,0 +1,5 @@ +kaggle +numpy~=1.19.2 +pandas +sklearn +tensorflow diff --git a/lab5/script.sh b/lab5/script.sh new file mode 100644 index 0000000..b6fa232 --- /dev/null +++ b/lab5/script.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' +unzip smart-grid-stability.zip >>/dev/null 2>&1 + +python3 test_eval.py diff --git a/lab5/test_eval.py b/lab5/test_eval.py new file mode 100644 index 0000000..2b95daa --- /dev/null +++ b/lab5/test_eval.py @@ -0,0 +1,61 @@ +import numpy as np +import pandas as pd +import tensorflow as tf + +from sklearn import preprocessing +from sklearn.model_selection import train_test_split +from tensorflow.keras import layers + + +def onezero(label): + return 0 if label == 'unstable' else 1 + + +df = pd.read_csv('smart_grid_stability_augmented.csv') + +scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1]) +df_norm_array = scaler.transform(df.iloc[:, 0:-1]) +df_norm = pd.DataFrame(data=df_norm_array, + columns=df.columns[:-1]) +df_norm['stabf'] = df['stabf'] + +df_norm_data = df_norm.copy() +df_norm_data = df_norm_data.drop('stab', axis=1) +df_norm_labels = df_norm_data.pop('stabf') + +X_train, X_testAndValid, Y_train, Y_testAndValid = train_test_split( + df_norm_data, + df_norm_labels, + test_size=0.2, + random_state=42) + +X_test, X_valid, Y_test, Y_valid = train_test_split( + X_testAndValid, + Y_testAndValid, + test_size=0.5, + random_state=42) + +model = tf.keras.Sequential([ + layers.Input(shape=(12,)), + layers.Dense(32), + layers.Dense(16), + layers.Dense(2, activation='softmax') + ]) + +model.compile( + loss=tf.losses.BinaryCrossentropy(), + optimizer=tf.optimizers.Adam(), + metrics=[tf.keras.metrics.BinaryAccuracy()]) + +Y_train_one_zero = [onezero(x) for x in Y_train] +Y_train_onehot = np.eye(2)[Y_train_one_zero] + +Y_test_one_zero = [onezero(x) for x in Y_test] +Y_test_onehot = np.eye(2)[Y_test_one_zero] + +history = model.fit(tf.convert_to_tensor(X_train, np.float32), Y_train_onehot, epochs=5) + +results = model.evaluate(X_test, Y_test_onehot, batch_size=64) +f = open('model_eval.txt', 'w') +f.write('test loss: ' + str(results[0]) + '\n' + 'test acc: ' + str(results[1])) +f.close() diff --git a/lab5/train.py b/lab5/train.py new file mode 100644 index 0000000..686b0bf --- /dev/null +++ b/lab5/train.py @@ -0,0 +1,36 @@ +import numpy as np +import pandas as pd +import tensorflow as tf +from tensorflow.keras import layers + +def onezero(label): + return 0 if label == 'unstable' else 1 + + +X_train = pd.read_csv('train.csv') +X_test = pd.read_csv('test.csv') + +Y_train = X_train.pop('stabf') +Y_test = X_test.pop('stabf') + +Y_train_one_zero = [onezero(x) for x in Y_train] +Y_train_onehot = np.eye(2)[Y_train_one_zero] + +Y_test_one_zero = [onezero(x) for x in Y_test] +Y_test_onehot = np.eye(2)[Y_test_one_zero] + +model = tf.keras.Sequential([ + layers.Input(shape=(12,)), + layers.Dense(32), + layers.Dense(16), + layers.Dense(2, activation='softmax')]) + +model.compile( + loss=tf.losses.BinaryCrossentropy(), + optimizer=tf.optimizers.Adam(), + metrics=[tf.keras.metrics.BinaryAccuracy()]) + +history = model.fit(tf.convert_to_tensor(X_train, np.float32), + Y_train_onehot, epochs=5) + +model.save('grid_stability.h5')