from sacred.observers import MongoObserver from sacred import Experiment from sacred.observers import FileStorageObserver import pandas as pd import numpy as np from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from sklearn.preprocessing import LabelEncoder from sklearn.metrics import accuracy_score from keras.utils import np_utils from tensorflow import keras ex = Experiment("s444517_sacred", save_git_info=False) ex.observers.append(FileStorageObserver("sacred/")) ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred')) @ex.config def config_data(): epoch = 200 first_activation_funct = "relu" second_activation_funct = "softmax" class MetricsLoggerCallback(keras.callbacks.Callback): def __init__(self, _run): super().__init__() self._run = _run def on_epoch_end(self, _, logs): self._run.log_scalar("training.acc", logs.get('accuracy')) # reading data def read_data(): all_data = [] for name in ['train', 'test', 'validate']: all_data.append(pd.read_csv(f'apps_{name}.csv', header=0)) return all_data def data_prep(): train_set, test_set, validate_set = read_data() train_set = train_set.drop(columns=["Unnamed: 0"]) test_set = test_set.drop(columns=["Unnamed: 0"]) validate_set = validate_set.drop(columns=["Unnamed: 0"]) numeric_columns = ["Rating", "Reviews", "Installs", "Price", "Genres_numeric_value"] # train set set-up x_train_set = train_set[numeric_columns] y_train_set = train_set["Category"] encoder = LabelEncoder() encoder.fit(y_train_set) encoded_Y = encoder.transform(y_train_set) dummy_y = np_utils.to_categorical(encoded_Y) # validation set set-up x_validate_set = validate_set[numeric_columns] y_validate_set = validate_set["Category"] encoder = LabelEncoder() encoder.fit(y_validate_set) encoded_Yv = encoder.transform(y_validate_set) dummy_yv = np_utils.to_categorical(encoded_Yv) #test set set-up x_test_set = test_set[numeric_columns] y_test_set = test_set["Category"] y_class_names = train_set["Category"].unique() encoder = LabelEncoder() encoder.fit(y_test_set) encoded_Ytt = encoder.transform(y_test_set) dummy_ytt = np_utils.to_categorical(encoded_Ytt) return x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names @ex.main def my_main(epoch, first_activation_funct, second_activation_funct, _log, _run): x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names = data_prep() _log.info(f"EPOCH: {epoch}, 1st activation function: {first_activation_funct}, 2nd activation function: {second_activation_funct}") number_of_classes = 33 number_of_features = 5 model = Sequential() model.add(Dense(number_of_classes, activation=first_activation_funct)) model.add(Dense(number_of_classes, activation=second_activation_funct,input_dim=number_of_features)) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy']) model.fit(x_train_set, dummy_y, epochs=epoch, validation_data=(x_validate_set, dummy_yv)) model.save("my_model/") ex.add_artifact("my_model/saved_model.pb") #model predictions yhat = model.predict(x_test_set) y_true = [] y_pred = [] for numerator, single_pred in enumerate(yhat): y_true.append(sorted(y_class_names)[np.argmax(single_pred)]) y_pred.append(y_test_set[numerator]) _run.info["accuracy"] = accuracy_score(y_true, y_pred) ex.run()