From 2f59e13059131578f33da3535160aaa8a38f7762 Mon Sep 17 00:00:00 2001 From: AdamOsiowy123 Date: Wed, 4 May 2022 18:08:52 +0200 Subject: [PATCH] extend evaluation job --- Jenkins/Jenkinsfile.evaluation | 24 ++++++---- Jenkins/Jenkinsfile.training | 4 ++ Scripts/evaluate_neural_network.py | 70 +++++++++++++++++++----------- requirements.txt | 5 +++ 4 files changed, 69 insertions(+), 34 deletions(-) diff --git a/Jenkins/Jenkinsfile.evaluation b/Jenkins/Jenkinsfile.evaluation index 6276cfb..0e28c06 100644 --- a/Jenkins/Jenkinsfile.evaluation +++ b/Jenkins/Jenkinsfile.evaluation @@ -4,12 +4,17 @@ node { docker.image('s444452/ium:1.3').inside { stage('Preparation') { properties([ - pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-training")]), parameters([ + gitParameter branchFilter: 'origin/(.*)', defaultValue: 'master', name: 'BRANCH', type:'PT_BRANCH', + buildSelector( + defaultSelector: upstream(), + description: 'Which build to use for copying artifacts', + name: 'BUILD_SELECTOR' + ), string( - defaultValue: ".,14000,100", - description: 'Test params: data_path,num_words,pad_length', - name: 'TEST_PARAMS' + defaultValue: ".,14000,1,50,100", + description: 'Test params: data_path,num_words,epochs,batch_size,pad_length', + name: 'TEST_PARAMS' ) ]) ]) @@ -17,14 +22,17 @@ node { stage('Copy artifacts') { copyArtifacts filter: 'train_data.csv', fingerprintArtifacts: true, projectName: 's444452-create-dataset' copyArtifacts filter: 'test_data.csv', fingerprintArtifacts: true, projectName: 's444452-create-dataset' + git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s444452/ium_444452.git' + copyArtifacts filter: 'neural_network_evaluation.csv', projectName: "s444452-evaluation/${BRANCH}/", optional: true + copyArtifacts filter: 'model/neural_net', projectName: "s444452-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR') } stage('Run script') { - withEnv(["TEST_PARAMS=${params.TEST_PARAMS}"]) { - sh "python3 Scripts/evaluate_neural_network.py $TEST_PARAMS" + withEnv(["TEST_PARAMS=${params.TEST_PARAMS}", "BUILD_NR"=${params.BUILD_SELECTOR}]) { + sh "python3 Scripts/evaluate_neural_network.py $BUILD_NR $TEST_PARAMS" } } stage('Archive artifacts') { - archiveArtifacts "neural_network_evaluation.txt" + archiveArtifacts "neural_network_evaluation.csv, evaluation.png", onlyIfSuccessful: true } } } catch (e) { @@ -38,7 +46,7 @@ def notifyBuild(String buildStatus = 'STARTED') { buildStatus = buildStatus ?: 'SUCCESS' def subject = "Job: ${env.JOB_NAME}" - def details = "Build nr: ${env.BUILD_NUMBER}, status: ${buildStatus} \n url: ${env.BUILD_URL} \n build params: ${params.TRAIN_PARAMS}" + def details = "Build nr: ${env.BUILD_NUMBER}, status: ${buildStatus} \n url: ${env.BUILD_URL} \n build params: ${params.TEST_PARAMS}" emailext ( subject: subject, diff --git a/Jenkins/Jenkinsfile.training b/Jenkins/Jenkinsfile.training index a9ed82e..561d9de 100644 --- a/Jenkins/Jenkinsfile.training +++ b/Jenkins/Jenkinsfile.training @@ -40,6 +40,10 @@ def notifyBuild(String buildStatus = 'STARTED') { def subject = "Job: ${env.JOB_NAME}" def details = "Build nr: ${env.BUILD_NUMBER}, status: ${buildStatus} \n url: ${env.BUILD_URL} \n build params: ${params.TRAIN_PARAMS}" + if (buildStatus == 'SUCCESS') { + build job: 's444452-evaluation/${env.BRANCH_NAME}/', parameters: [string(name: 'TEST_PARAMS', value: "${params.TRAIN_PARAMS}")] + } + emailext ( subject: subject, body: details, diff --git a/Scripts/evaluate_neural_network.py b/Scripts/evaluate_neural_network.py index 6d814a6..dd4a09f 100644 --- a/Scripts/evaluate_neural_network.py +++ b/Scripts/evaluate_neural_network.py @@ -1,21 +1,22 @@ #!/usr/bin/python -import datetime import glob import os -import pprint import sys import pandas as pd -from keras.models import Sequential, load_model -from keras import layers +from keras.models import load_model from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences -from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score +from sklearn.metrics import precision_score, recall_score, f1_score import logging +import matplotlib.pyplot as plt logging.getLogger("tensorflow").setLevel(logging.ERROR) +build_number = '' data_path = '' num_words = 0 +epochs = 0 +batch_size = 0 pad_length = 0 @@ -30,28 +31,41 @@ def tokenize(x, x_test): def evaluate_and_save(model, x, y, abs_path): + global build_number loss, accuracy = model.evaluate(x, y, verbose=False) y_predicted = (model.predict(x) >= 0.5).astype(int) - - if os.path.exists(os.path.join(abs_path, 'neural_network_evaluation.txt')): - with open(os.path.join(abs_path, 'neural_network_evaluation.txt'), "a") as log_file: - for obj in ( - ('Accuracy: ', accuracy), ('Loss: ', loss), ('Precision: ', precision_score(y, y_predicted)), - ('Recall: ', recall_score(y, y_predicted)), ('F1: ', f1_score(y, y_predicted)), - ('Accuracy: ', accuracy_score(y, y_predicted))): - log_file.write(str(obj) + '\n') - else: - with open(os.path.join(abs_path, 'neural_network_evaluation.txt'), "w") as log_file: - for obj in ( - ('Accuracy: ', accuracy), ('Loss: ', loss), ('Precision: ', precision_score(y, y_predicted)), - ('Recall: ', recall_score(y, y_predicted)), ('F1: ', f1_score(y, y_predicted)), - ('Accuracy: ', accuracy_score(y, y_predicted))): - log_file.write(str(obj) + '\n') + evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv') + with open(evaluation_file_path, 'a+') as f: + result = f'{build_number},{accuracy},{loss},{precision_score(y, y_predicted)},{recall_score(y, y_predicted)},{f1_score(y, y_predicted)}' + f.write(result + '\n') -def load_trained_model(abs_path): - model_name = glob.glob('neural_net_*')[0] - return load_model(os.path.join(abs_path, model_name)) +def generate_and_save_comparison(abs_path): + evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv') + df = pd.read_csv(evaluation_file_path, sep=',', header=None, + names=['build_number', 'Accuracy', 'Loss', 'Precision', 'Recall', 'F1']) + fig = plt.figure(figsize=(16 * .6, 9 * .6)) + ax = fig.add_subplot(111) + ax.set_title('Evaluation') + X = df['build_number'] + ax.set_xlabel('build_number') + ax.set_xticks(df['build_number']) + for metrics, color in zip(['Accuracy', 'Loss', 'Precision', 'Recall', 'F1'], + ['green', 'red', 'blue', 'brown', 'magenta']): + ax.plot(X, df[metrics], color=color, lw=1, label=f'{metrics}') + ax.legend() + plt.savefig(os.path.join(abs_path, 'evaluation.png'), format='png') + return ax + + +def load_trained_model(): + # glob_pattern = os.path.join(os.getcwd(), 'model', 'neural_net_*') + glob_pattern = os.path.join(os.getcwd(), 'model', 'neural_net') + models = glob.glob(glob_pattern) + models = [os.path.split(x)[1] for x in models] + # model_name = sorted(models, key=lambda x: datetime.datetime.strptime(x[11:], '%d-%b-%Y-%H:%M:%S'), + # reverse=True)[0] + return load_model(os.path.join(os.getcwd(), 'model', models[0])) def split_data(data): @@ -65,9 +79,12 @@ def load_data(data_path, filename) -> pd.DataFrame: def read_params(): - global data_path, num_words, pad_length - data_path, num_words, pad_length = sys.argv[1].split(',') + global build_number, data_path, num_words, epochs, batch_size, pad_length + build_number = sys.argv[1] + data_path, num_words, epochs, batch_size, pad_length = sys.argv[2].split(',') num_words = int(num_words) + epochs = int(epochs) + batch_size = int(batch_size) pad_length = int(pad_length) @@ -80,8 +97,9 @@ def main(): x_train, _ = split_data(train_data) x_test, y_test = split_data(test_data) x_test, _ = tokenize(pd.concat([x_train, x_test]), x_test) - model = load_trained_model(abs_data_path) + model = load_trained_model() evaluate_and_save(model, x_test, y_test, abs_data_path) + generate_and_save_comparison(abs_data_path) if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index e74fa5b..04a3068 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,8 +5,10 @@ certifi==2021.10.8 charset-normalizer==2.0.12 click==8.1.2 colorama==0.4.4 +cycler==0.11.0 docopt==0.6.2 flatbuffers==2.0 +fonttools==4.33.3 gast==0.5.3 gitdb==4.0.9 GitPython==3.1.27 @@ -22,8 +24,10 @@ jsonpickle==1.5.2 kaggle==1.5.12 keras==2.8.0 Keras-Preprocessing==1.1.2 +kiwisolver==1.4.2 libclang==14.0.1 Markdown==3.3.6 +matplotlib==3.5.2 munch==2.5.0 nltk==3.7 numpy==1.22.3 @@ -31,6 +35,7 @@ oauthlib==3.2.0 opt-einsum==3.3.0 packaging==21.3 pandas==1.4.2 +Pillow==9.1.0 protobuf==3.20.1 py-cpuinfo==8.0.0 pyasn1==0.4.8