From 850263b3b88ef1b6fa3428938a3d7946a0488ac7 Mon Sep 17 00:00:00 2001 From: AWieczarek Date: Mon, 6 May 2024 09:17:27 +0200 Subject: [PATCH] IUM_06 --- IUM_05-predict.py | 14 ++++----- IUM_06-metrics.py | 24 +++++++++++++++ IUM_06-plot.py | 24 +++++++++++++++ Jenkinsfile | 76 +++++++++++++++++++++++++++-------------------- Test.txt | 1 - 5 files changed, 98 insertions(+), 41 deletions(-) create mode 100644 IUM_06-metrics.py create mode 100644 IUM_06-plot.py delete mode 100644 Test.txt diff --git a/IUM_05-predict.py b/IUM_05-predict.py index 7f222a3..54038e4 100644 --- a/IUM_05-predict.py +++ b/IUM_05-predict.py @@ -1,18 +1,18 @@ import pandas as pd -import numpy as np import tensorflow as tf test_data = pd.read_csv('./beer_reviews_test.csv') X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']] +y_test = test_data['review_overall'] model = tf.keras.models.load_model('beer_review_sentiment_model.h5') -tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000) +predictions = model.predict(X_test) +print(f'Predictions shape: {predictions.shape}') -X_test_seq = tokenizer.texts_to_sequences(X_test) -X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100) +if len(predictions.shape) > 1: + predictions = predictions[:, 0] -predictions = model.predict(X_test_pad) - -np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f') +results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test}) +results.to_csv('beer_review_sentiment_predictions.csv', index=False) \ No newline at end of file diff --git a/IUM_06-metrics.py b/IUM_06-metrics.py new file mode 100644 index 0000000..e9189a9 --- /dev/null +++ b/IUM_06-metrics.py @@ -0,0 +1,24 @@ +import pandas as pd +from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error +from math import sqrt +import sys + +data = pd.read_csv('beer_review_sentiment_predictions.csv') +y_pred = data['Predictions'] +y_test = data['Actual'] +y_test_binary = (y_test >= 3).astype(int) + +build_number = sys.argv[1] + +accuracy = accuracy_score(y_test_binary, y_pred.round()) +precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro') +rmse = sqrt(mean_squared_error(y_test, y_pred)) + +print(f'Accuracy: {accuracy}') +print(f'Micro-avg Precision: {precision}') +print(f'Micro-avg Recall: {recall}') +print(f'F1 Score: {f1}') +print(f'RMSE: {rmse}') + +with open(r"beer_metrics.txt", "a") as f: + f.write(f"{accuracy},{build_number}\n") \ No newline at end of file diff --git a/IUM_06-plot.py b/IUM_06-plot.py new file mode 100644 index 0000000..cf7470f --- /dev/null +++ b/IUM_06-plot.py @@ -0,0 +1,24 @@ +import matplotlib.pyplot as plt + + +def main(): + accuracy = [] + build_numbers = [] + + with open("beer_metrics.txt") as f: + for line in f: + accuracy.append(float(line.split(",")[0])) + build_numbers.append(int(line.split(",")[1])) + + plt.plot(build_numbers, accuracy) + plt.xlabel("Build Number") + plt.ylabel("Accuracy") + plt.title("Accuracy of the model over time") + plt.xticks(range(min(build_numbers), max(build_numbers) + 1)) + plt.show() + + plt.savefig("acc.png") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index f9475e6..a569b63 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,52 +1,62 @@ pipeline { - agent any - - parameters { - string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych') - string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username') - password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key') + agent { + dockerfile true } + triggers { + upstream(upstreamProjects: 's464979-training/training', threshold: hudson.model.Result.SUCCESS) + } + + parameters { + buildSelector(defaultSelector: lastSuccessful(), description: 'Which build to use for copying artifacts', name: 'BUILD_SELECTOR') + gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH' + } + stages { stage('Clone Repository') { steps { - git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979" + git branch: 'evaluation', url: "https://git.wmi.amu.edu.pl/s464979/ium_464979" } } - stage('Download dataset') { + stage('Copy Dataset Artifacts') { steps { - withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) { - sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip" - } + copyArtifacts filter: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', projectName: 'z-s464979-create-dataset', selector: buildParameter('BUILD_SELECTOR') } } - stage('Process and Split Dataset') { - agent { - dockerfile { - filename 'Dockerfile' - reuseNode true - } - } - steps { - sh "chmod +x ./IUM_05-split.py" - sh "python3 ./IUM_05-split.py" - archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true - } + stage('Copy Training Artifacts') { + steps { + copyArtifacts filter: 'beer_review_sentiment_model.h5', projectName: 's464979-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR') + } } - stage("Run") { - agent { - dockerfile { - filename 'Dockerfile' - reuseNode true - } - } + stage('Copy Evaluation Artifacts') { + steps { + copyArtifacts filter: 'beer_metrics.txt', projectName: '_s464979-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true + } + } + stage("Run predictions") { steps { - sh "chmod +x ./IUM_05-model.py" sh "chmod +x ./IUM_05-predict.py" - sh "python3 ./IUM_05-model.py" sh "python3 ./IUM_05-predict.py" - archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true + archiveArtifacts artifacts: 'beer_review_sentiment_predictions.csv', onlyIfSuccessful: true } } + stage('Run metrics') { + steps { + sh 'chmod +x ./IUM_06-metrics.py' + sh "python3 ./IUM_06-metrics.py ${currentBuild.number}" + } + } + + stage('Run plot') { + steps { + sh 'chmod +x ./IUM_06-plot.py' + sh 'python3 ./IUM_06-plot.py' + } + } + stage('Archive Artifacts') { + steps { + archiveArtifacts artifacts: '*', onlyIfSuccessful: true + } + } } } diff --git a/Test.txt b/Test.txt deleted file mode 100644 index 9118d6c..0000000 --- a/Test.txt +++ /dev/null @@ -1 +0,0 @@ -asdasd \ No newline at end of file