Multipipeline #wip

2021-05-16 20:10:28 +02:00 · 2021-05-16 20:10:28 +02:00 · 4b2e314d1c
commit 4b2e314d1c
parent 2841a76304
6 changed files with 5745 additions and 5683 deletions
--- a/4
+++ b/4
@ -4,3 +4,7 @@ RUN apt update && apt install -y python3 python3-pip

 RUN pip3 install kaggle
 RUN pip3 install pandas
+RUN pip3 install tensorflow
+RUN pip3 install numpy
+RUN pip3 install matplotlib
+RUN pip3 install sklearn
--- a/43
+++ b/43
@ -0,0 +1,43 @@
+pipeline {
+	agent any
+    parameters {
+      buildSelector(
+          defaultSelector: lastSuccessful(),
+          description: 'Which build to use for copying artifacts',
+          name: 'BUILD_SELECTOR')
+    }
+	stages {
+        stage('Copy artifact') {
+                steps {
+                     copyArtifacts filter: 'dev.csv, train.csv, test.csv', fingerprintArtifacts: false, projectName: 's434780-create-dataset', selector: buildParameter('BUILD_SELECTOR')
+                }
+            }
+		stage('docker') {
+			steps {
+                script {
+                    def img = docker.build('s434780/ium:1.0')
+                    img.inside {
+                        sh 'chmod +x train-tensorflow.py'
+                        sh 'python3 ./train.tensorflow.py'
+                    }
+                }
+			}
+		}
+
+		stage('archiveArtifacts') {
+            steps {
+				archiveArtifacts 'trained_model'
+			}
+		}
+	}
+	post {
+		success {
+			emailext body: 'Success train',	subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
+		}
+
+		failure {
+			emailext body: 'Failed train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
+		}
+
+	}
+}
--- a/eval-tensorflow.py
+++ b/eval-tensorflow.py
@ -0,0 +1,28 @@
+import pandas as pd
+import numpy as np
+from tensorflow import keras
+from sklearn.metrics import accuracy_score, f1_score
+import matplotlib.pyplot as plt
+
+
+model = keras.models.load_model('trained_model')
+
+test_df = pd.read_csv('test.csv')
+test_x = test_df['reviews.text'].to_numpy()
+test_y = test_df['reviews.doRecommend'].to_numpy()
+
+# print(test_y.shape)
+# print(test_x.shape)
+
+predictions = model.predict(test_x)
+
+predictions = [1 if p > 0.5 else 0 for p in predictions]
+
+accuracy = accuracy_score(test_y, predictions)
+f1 = f1_score(test_y, predictions)
+
+file = open('evaluation.txt', 'w')
+file.writelines(accuracy.__str__() + '\n')
+file.writelines(f1.__str__())
+file.close()
+
--- a/main.py
+++ b/main.py
@ -1,20 +1,6 @@
 import string
 import pandas as pd
 from sklearn.model_selection import train_test_split
-import nltk
-nltk.download('stopwords')
-from nltk.corpus import stopwords
-
-
-def remove_punct(text):
-    translator = str.maketrans("", "", string.punctuation)
-    return text.translate(translator)
-
-
-stop = set(stopwords.words("english"))
-def remove_stopwords(text):
-    filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
-    return " ".join(filtered_words)


 def main():
--- a/test.csv
+++ b/test.csv
--- a/train-tensorflow.py
+++ b/train-tensorflow.py
@ -1,7 +1,6 @@
 import pandas as pd
 from silence_tensorflow import silence_tensorflow
 from tensorflow import keras
-
 silence_tensorflow()
 from tensorflow.keras.preprocessing.text import Tokenizer
 from collections import Counter
@ -56,6 +55,9 @@ train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post",
 val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
 test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")

+test_df['reviews.text'] = test_padded
+test_df.to_csv('test.csv')
+

 model = keras.models.Sequential()
 model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
@ -75,6 +77,8 @@ predictions = model.predict(test_padded)

 predictions = [1 if p > 0.5 else 0 for p in predictions]

+model.save('trained_model')
+
 file = open('results.txt', 'w')
 file.write(predictions.__str__())
 file.close()