diff --git a/Scripts/evaluate_neural_network.py b/Scripts/evaluate_neural_network.py index 97d343e..765bbe1 100644 --- a/Scripts/evaluate_neural_network.py +++ b/Scripts/evaluate_neural_network.py @@ -7,21 +7,35 @@ from keras.models import load_model from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences from sklearn.metrics import precision_score, recall_score, f1_score -import logging import matplotlib.pyplot as plt +from sacred.observers import MongoObserver +from sacred.observers import FileStorageObserver +from sacred import Experiment -logging.getLogger("tensorflow").setLevel(logging.ERROR) +ex = Experiment(name='fake_job_classification_evaluation') +# ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017')) +ex.observers.append(FileStorageObserver('my_runs')) -build_number = '' -data_path = '' -num_words = 0 -epochs = 0 -batch_size = 0 -pad_length = 0 +build_number = sys.argv[1] +data_path = sys.argv[2] +epochs = int(sys.argv[3]) +num_words = int(sys.argv[4]) +batch_size = int(sys.argv[5]) +pad_length = int(sys.argv[6]) -def tokenize(x, x_test): - global pad_length, num_words +@ex.config +def config(): + build_number = build_number + data_path = data_path + epochs = epochs + num_words = num_words + batch_size = batch_size + pad_length = pad_length + + +@ex.capture +def tokenize(x, x_test, pad_length, num_words): tokenizer = Tokenizer(num_words=num_words) tokenizer.fit_on_texts(x) test_x = tokenizer.texts_to_sequences(x_test) @@ -30,14 +44,17 @@ def tokenize(x, x_test): return test_x, vocabulary_length -def evaluate_and_save(model, x, y, abs_path): - global build_number +@ex.capture +def evaluate_and_save(model, x, y, abs_path, build_number): loss, accuracy = model.evaluate(x, y, verbose=False) y_predicted = (model.predict(x) >= 0.5).astype(int) evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv') with open(evaluation_file_path, 'a+') as f: result = f'{build_number},{accuracy},{loss},{precision_score(y, y_predicted)},{recall_score(y, y_predicted)},{f1_score(y, y_predicted)}' f.write(result + '\n') + ex.log_scalar("loss", loss) + ex.log_scalar("accuracy", accuracy) + ex.add_artifact(evaluation_file_path) def generate_and_save_comparison(abs_path): @@ -56,6 +73,7 @@ def generate_and_save_comparison(abs_path): ax.plot(X, df[metrics], color=color, lw=1, label=f'{metrics}') ax.legend() plt.savefig(os.path.join(abs_path, 'evaluation.png'), format='png') + ex.add_artifact(os.path.join(abs_path, 'evaluation.png')) return ax @@ -79,19 +97,8 @@ def load_data(data_path, filename) -> pd.DataFrame: return pd.read_csv(os.path.join(data_path, filename)) -def read_params(): - global build_number, data_path, num_words, epochs, batch_size, pad_length - build_number = sys.argv[1] - data_path, num_words, epochs, batch_size, pad_length = sys.argv[2].split(',') - num_words = int(num_words) - epochs = int(epochs) - batch_size = int(batch_size) - pad_length = int(pad_length) - - -def main(): - read_params() - global data_path +@ex.main +def main(build_number, data_path, num_words, epochs, batch_size, pad_length, _run): abs_data_path = os.path.abspath(data_path) train_data = load_data(abs_data_path, 'train_data.csv') test_data = load_data(abs_data_path, 'test_data.csv') @@ -103,5 +110,4 @@ def main(): generate_and_save_comparison(abs_data_path) -if __name__ == '__main__': - main() +ex.run() diff --git a/Scripts/train_neural_network.py b/Scripts/train_neural_network.py index cf35ed3..e2c1c69 100644 --- a/Scripts/train_neural_network.py +++ b/Scripts/train_neural_network.py @@ -1,5 +1,4 @@ #!/usr/bin/python -import datetime import os import sys import pandas as pd @@ -7,19 +6,32 @@ from keras.models import Sequential from keras import layers from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences -import logging +from sacred.observers import MongoObserver +from sacred.observers import FileStorageObserver +from sacred import Experiment -logging.getLogger("tensorflow").setLevel(logging.ERROR) +ex = Experiment(name='fake_job_classification_training') +# ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017')) +ex.observers.append(FileStorageObserver('my_runs')) -data_path = '' -num_words = 0 -epochs = 0 -batch_size = 0 -pad_length = 0 +data_path = sys.argv[1] +epochs = int(sys.argv[2]) +num_words = int(sys.argv[3]) +batch_size = int(sys.argv[4]) +pad_length = int(sys.argv[5]) -def tokenize(x, x_train): - global pad_length, num_words +@ex.config +def config(): + data_path = data_path + epochs = epochs + num_words = num_words + batch_size = batch_size + pad_length = pad_length + + +@ex.capture +def tokenize(x, x_train, pad_length, num_words): tokenizer = Tokenizer(num_words=num_words) tokenizer.fit_on_texts(x) train_x = tokenizer.texts_to_sequences(x_train) @@ -32,15 +44,16 @@ def save_model(model): # model_name = 'neural_net_' + datetime.datetime.today().strftime('%d-%b-%Y-%H:%M:%S') model_name = 'neural_net' model.save(os.path.join(os.getcwd(), 'model', model_name), save_format='h5', overwrite=True) + ex.add_artifact(os.path.join(os.getcwd(), 'model', model_name)) -def train_model(model, x_train, y_train): - global epochs, batch_size +@ex.capture +def train_model(model, x_train, y_train, epochs, batch_size): model.fit(x_train, y_train, epochs=epochs, verbose=False, batch_size=batch_size) -def get_model(vocabulary_length): - global pad_length, batch_size +@ex.capture +def get_model(vocabulary_length, batch_size, pad_length): model = Sequential() model.add(layers.Embedding(input_dim=vocabulary_length, output_dim=batch_size, @@ -64,18 +77,8 @@ def load_data(data_path, filename) -> pd.DataFrame: return pd.read_csv(os.path.join(data_path, filename)) -def read_params(): - global data_path, num_words, epochs, batch_size, pad_length - data_path, num_words, epochs, batch_size, pad_length = sys.argv[1].split(',') - num_words = int(num_words) - epochs = int(epochs) - batch_size = int(batch_size) - pad_length = int(pad_length) - - -def main(): - read_params() - global data_path +@ex.main +def main(data_path, num_words, epochs, batch_size, pad_length, _run): abs_data_path = os.path.abspath(data_path) train_data = load_data(abs_data_path, 'train_data.csv') test_data = load_data(abs_data_path, 'test_data.csv') @@ -87,5 +90,4 @@ def main(): save_model(model) -if __name__ == '__main__': - main() +ex.run()