ium_444517/nn_train_sacred.py
2022-05-05 20:10:45 +02:00

101 lines
3.6 KiB
Python

from sacred.observers import MongoObserver
from sacred import Experiment
from sacred.observers import FileStorageObserver
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from keras.utils import np_utils
from tensorflow import keras
ex = Experiment("s444517_sacred", save_git_info=False)
ex.observers.append(FileStorageObserver("sacred/"))
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017', db_name='sacred'))
@ex.config
def config_data():
epoch = 200
first_activation_funct = "relu"
second_activation_funct = "softmax"
class MetricsLoggerCallback(keras.callbacks.Callback):
def __init__(self, _run):
super().__init__()
self._run = _run
def on_epoch_end(self, _, logs):
self._run.log_scalar("training.acc", logs.get('accuracy'))
# reading data
def read_data():
all_data = []
for name in ['train', 'test', 'validate']:
all_data.append(pd.read_csv(f'apps_{name}.csv', header=0))
return all_data
def data_prep():
train_set, test_set, validate_set = read_data()
train_set = train_set.drop(columns=["Unnamed: 0"])
test_set = test_set.drop(columns=["Unnamed: 0"])
validate_set = validate_set.drop(columns=["Unnamed: 0"])
numeric_columns = ["Rating", "Reviews", "Installs", "Price", "Genres_numeric_value"]
# train set set-up
x_train_set = train_set[numeric_columns]
y_train_set = train_set["Category"]
encoder = LabelEncoder()
encoder.fit(y_train_set)
encoded_Y = encoder.transform(y_train_set)
dummy_y = np_utils.to_categorical(encoded_Y)
# validation set set-up
x_validate_set = validate_set[numeric_columns]
y_validate_set = validate_set["Category"]
encoder = LabelEncoder()
encoder.fit(y_validate_set)
encoded_Yv = encoder.transform(y_validate_set)
dummy_yv = np_utils.to_categorical(encoded_Yv)
#test set set-up
x_test_set = test_set[numeric_columns]
y_test_set = test_set["Category"]
y_class_names = train_set["Category"].unique()
encoder = LabelEncoder()
encoder.fit(y_test_set)
encoded_Ytt = encoder.transform(y_test_set)
dummy_ytt = np_utils.to_categorical(encoded_Ytt)
return x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names
@ex.main
def my_main(epoch, first_activation_funct, second_activation_funct, _log, _run):
x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names = data_prep()
_log.info(f"EPOCH: {epoch}, 1st activation function: {first_activation_funct}, 2nd activation function: {second_activation_funct}")
number_of_classes = 33
number_of_features = 5
model = Sequential()
model.add(Dense(number_of_classes, activation=first_activation_funct))
model.add(Dense(number_of_classes, activation=second_activation_funct,input_dim=number_of_features))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x_train_set, dummy_y, epochs=epoch, validation_data=(x_validate_set, dummy_yv))
model.save("my_model/")
ex.add_artifact("my_model/saved_model.pb")
#model predictions
yhat = model.predict(x_test_set)
y_true = []
y_pred = []
for numerator, single_pred in enumerate(yhat):
y_true.append(sorted(y_class_names)[np.argmax(single_pred)])
y_pred.append(y_test_set[numerator])
_run.info["accuracy"] = accuracy_score(y_true, y_pred)
ex.run()