From 67f0cc47fc1fe36be04edb18f1238e4973fa0911 Mon Sep 17 00:00:00 2001 From: Kamila Date: Thu, 5 May 2022 20:05:14 +0200 Subject: [PATCH] sacred hw --- Dockerfile | 2 + Jenkinsfile_sacred | 24 +++++++++++ nn_train_sacred.py | 102 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+) create mode 100644 Jenkinsfile_sacred create mode 100644 nn_train_sacred.py diff --git a/Dockerfile b/Dockerfile index a972862..6691742 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,8 @@ RUN pip3 install matplotlib RUN pip3 install tensorflow RUN pip3 install keras RUN pip3 install sklearn +RUN pip3 install pymongo +RUN pip3 install sacred CMD python3 data_expl.py CMD python3 nn_train.py \ No newline at end of file diff --git a/Jenkinsfile_sacred b/Jenkinsfile_sacred new file mode 100644 index 0000000..dfffdf1 --- /dev/null +++ b/Jenkinsfile_sacred @@ -0,0 +1,24 @@ +pipeline { + agent { + dockerfile true + } + + stages { + stage('Stage 1') { + steps { + echo 'Hello world!' + } + } + + stage('Run sacred on nn_train ') { + steps { + + sh 'python3 ./nn_train_sacred.py' + archiveArtifacts artifacts: 'sacred/_sources/*, sacred/1/*' + sh 'rm -r sacred' + + } + } + } +} + diff --git a/nn_train_sacred.py b/nn_train_sacred.py new file mode 100644 index 0000000..3b85ea4 --- /dev/null +++ b/nn_train_sacred.py @@ -0,0 +1,102 @@ +from cv2 import FileStorage +from sacred.observers import MongoObserver +from sacred import Experiment +from sacred.observers import FileStorageObserver + +import pandas as pd +import numpy as np + +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense +from sklearn.preprocessing import LabelEncoder +from sklearn.metrics import accuracy_score +from keras.utils import np_utils +from tensorflow import keras + +ex = Experiment("s444517_sacred") +ex.observers.append(FileStorageObserver("sacred/")) +ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017', db_name='sacred')) + +@ex.config +def config_data(): + epoch = 200 + first_activation_funct = "relu" + second_activation_funct = "softmax" + +class MetricsLoggerCallback(keras.callbacks.Callback): + def __init__(self, _run): + super().__init__() + self._run = _run + + def on_epoch_end(self, _, logs): + self._run.log_scalar("training.acc", logs.get('accuracy')) + + +# reading data +def read_data(): + all_data = [] + for name in ['train', 'test', 'validate']: + all_data.append(pd.read_csv(f'apps_{name}.csv', header=0)) + return all_data + +def data_prep(): + train_set, test_set, validate_set = read_data() + train_set = train_set.drop(columns=["Unnamed: 0"]) + test_set = test_set.drop(columns=["Unnamed: 0"]) + validate_set = validate_set.drop(columns=["Unnamed: 0"]) + numeric_columns = ["Rating", "Reviews", "Installs", "Price", "Genres_numeric_value"] + + # train set set-up + x_train_set = train_set[numeric_columns] + y_train_set = train_set["Category"] + encoder = LabelEncoder() + encoder.fit(y_train_set) + encoded_Y = encoder.transform(y_train_set) + dummy_y = np_utils.to_categorical(encoded_Y) + + # validation set set-up + x_validate_set = validate_set[numeric_columns] + y_validate_set = validate_set["Category"] + encoder = LabelEncoder() + encoder.fit(y_validate_set) + encoded_Yv = encoder.transform(y_validate_set) + dummy_yv = np_utils.to_categorical(encoded_Yv) + + #test set set-up + x_test_set = test_set[numeric_columns] + y_test_set = test_set["Category"] + y_class_names = train_set["Category"].unique() + encoder = LabelEncoder() + encoder.fit(y_test_set) + encoded_Ytt = encoder.transform(y_test_set) + dummy_ytt = np_utils.to_categorical(encoded_Ytt) + return x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names + + +@ex.main +def my_main(epoch, first_activation_funct, second_activation_funct, _log, _run): + x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names = data_prep() + + _log.info(f"EPOCH: {epoch}, 1st activation function: {first_activation_funct}, 2nd activation function: {second_activation_funct}") + number_of_classes = 33 + number_of_features = 5 + model = Sequential() + model.add(Dense(number_of_classes, activation=first_activation_funct)) + model.add(Dense(number_of_classes, activation=second_activation_funct,input_dim=number_of_features)) + model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy']) + model.fit(x_train_set, dummy_y, epochs=epoch, validation_data=(x_validate_set, dummy_yv)) + + model.save("my_model/") + ex.add_artifact("my_model/saved_model.pb") + + #model predictions + yhat = model.predict(x_test_set) + y_true = [] + y_pred = [] + for numerator, single_pred in enumerate(yhat): + y_true.append(sorted(y_class_names)[np.argmax(single_pred)]) + y_pred.append(y_test_set[numerator]) + + _run.info["accuracy"] = accuracy_score(y_true, y_pred) + +ex.run() \ No newline at end of file