sacred hw

This commit is contained in:
Kamila 2022-05-05 20:05:14 +02:00
parent 4f6b9471c2
commit 67f0cc47fc
3 changed files with 128 additions and 0 deletions

View File

@ -15,6 +15,8 @@ RUN pip3 install matplotlib
RUN pip3 install tensorflow
RUN pip3 install keras
RUN pip3 install sklearn
RUN pip3 install pymongo
RUN pip3 install sacred
CMD python3 data_expl.py
CMD python3 nn_train.py

24
Jenkinsfile_sacred Normal file
View File

@ -0,0 +1,24 @@
pipeline {
agent {
dockerfile true
}
stages {
stage('Stage 1') {
steps {
echo 'Hello world!'
}
}
stage('Run sacred on nn_train ') {
steps {
sh 'python3 ./nn_train_sacred.py'
archiveArtifacts artifacts: 'sacred/_sources/*, sacred/1/*'
sh 'rm -r sacred'
}
}
}
}

102
nn_train_sacred.py Normal file
View File

@ -0,0 +1,102 @@
from cv2 import FileStorage
from sacred.observers import MongoObserver
from sacred import Experiment
from sacred.observers import FileStorageObserver
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from keras.utils import np_utils
from tensorflow import keras
ex = Experiment("s444517_sacred")
ex.observers.append(FileStorageObserver("sacred/"))
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017', db_name='sacred'))
@ex.config
def config_data():
epoch = 200
first_activation_funct = "relu"
second_activation_funct = "softmax"
class MetricsLoggerCallback(keras.callbacks.Callback):
def __init__(self, _run):
super().__init__()
self._run = _run
def on_epoch_end(self, _, logs):
self._run.log_scalar("training.acc", logs.get('accuracy'))
# reading data
def read_data():
all_data = []
for name in ['train', 'test', 'validate']:
all_data.append(pd.read_csv(f'apps_{name}.csv', header=0))
return all_data
def data_prep():
train_set, test_set, validate_set = read_data()
train_set = train_set.drop(columns=["Unnamed: 0"])
test_set = test_set.drop(columns=["Unnamed: 0"])
validate_set = validate_set.drop(columns=["Unnamed: 0"])
numeric_columns = ["Rating", "Reviews", "Installs", "Price", "Genres_numeric_value"]
# train set set-up
x_train_set = train_set[numeric_columns]
y_train_set = train_set["Category"]
encoder = LabelEncoder()
encoder.fit(y_train_set)
encoded_Y = encoder.transform(y_train_set)
dummy_y = np_utils.to_categorical(encoded_Y)
# validation set set-up
x_validate_set = validate_set[numeric_columns]
y_validate_set = validate_set["Category"]
encoder = LabelEncoder()
encoder.fit(y_validate_set)
encoded_Yv = encoder.transform(y_validate_set)
dummy_yv = np_utils.to_categorical(encoded_Yv)
#test set set-up
x_test_set = test_set[numeric_columns]
y_test_set = test_set["Category"]
y_class_names = train_set["Category"].unique()
encoder = LabelEncoder()
encoder.fit(y_test_set)
encoded_Ytt = encoder.transform(y_test_set)
dummy_ytt = np_utils.to_categorical(encoded_Ytt)
return x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names
@ex.main
def my_main(epoch, first_activation_funct, second_activation_funct, _log, _run):
x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names = data_prep()
_log.info(f"EPOCH: {epoch}, 1st activation function: {first_activation_funct}, 2nd activation function: {second_activation_funct}")
number_of_classes = 33
number_of_features = 5
model = Sequential()
model.add(Dense(number_of_classes, activation=first_activation_funct))
model.add(Dense(number_of_classes, activation=second_activation_funct,input_dim=number_of_features))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x_train_set, dummy_y, epochs=epoch, validation_data=(x_validate_set, dummy_yv))
model.save("my_model/")
ex.add_artifact("my_model/saved_model.pb")
#model predictions
yhat = model.predict(x_test_set)
y_true = []
y_pred = []
for numerator, single_pred in enumerate(yhat):
y_true.append(sorted(y_class_names)[np.argmax(single_pred)])
y_pred.append(y_test_set[numerator])
_run.info["accuracy"] = accuracy_score(y_true, y_pred)
ex.run()