Multipipeline #wip

2021-05-16 20:10:28 +02:00 · 2021-05-16 20:10:28 +02:00 · 4b2e314d1c
commit 4b2e314d1c
parent 2841a76304
6 changed files with 5745 additions and 5683 deletions
--- a/6
+++ b/6
@ -3,4 +3,8 @@ FROM ubuntu:20.04
 RUN apt update && apt install -y python3 python3-pip
 RUN pip3 install kaggle
-RUN pip3 install pandas
+RUN pip3 install pandas
 RUN pip3 install tensorflow
 RUN pip3 install numpy
 RUN pip3 install matplotlib
 RUN pip3 install sklearn
--- a/43
+++ b/43
@ -0,0 +1,43 @@
 pipeline {
 	agent any
    parameters {
      buildSelector(
          defaultSelector: lastSuccessful(),
          description: 'Which build to use for copying artifacts',
          name: 'BUILD_SELECTOR')
    }
 	stages {
        stage('Copy artifact') {
                steps {
                     copyArtifacts filter: 'dev.csv, train.csv, test.csv', fingerprintArtifacts: false, projectName: 's434780-create-dataset', selector: buildParameter('BUILD_SELECTOR')
                }
            }
 		stage('docker') {
 			steps {
                script {
                    def img = docker.build('s434780/ium:1.0')
                    img.inside {
                        sh 'chmod +x train-tensorflow.py'
                        sh 'python3 ./train.tensorflow.py'
                    }
                }
 			}
 		}
 		stage('archiveArtifacts') {
            steps {
 				archiveArtifacts 'trained_model'
 			}
 		}
 	}
 	post {
 		success {
 			emailext body: 'Success train',	subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
 		}
 		failure {
 			emailext body: 'Failed train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
 		}
 	}
 }
--- a/eval-tensorflow.py
+++ b/eval-tensorflow.py
@ -0,0 +1,28 @@
 import pandas as pd
 import numpy as np
 from tensorflow import keras
 from sklearn.metrics import accuracy_score, f1_score
 import matplotlib.pyplot as plt
 model = keras.models.load_model('trained_model')
 test_df = pd.read_csv('test.csv')
 test_x = test_df['reviews.text'].to_numpy()
 test_y = test_df['reviews.doRecommend'].to_numpy()
 # print(test_y.shape)
 # print(test_x.shape)
 predictions = model.predict(test_x)
 predictions = [1 if p > 0.5 else 0 for p in predictions]
 accuracy = accuracy_score(test_y, predictions)
 f1 = f1_score(test_y, predictions)
 file = open('evaluation.txt', 'w')
 file.writelines(accuracy.__str__() + '\n')
 file.writelines(f1.__str__())
 file.close()
--- a/main.py
+++ b/main.py
@ -1,20 +1,6 @@
 import string
 import pandas as pd
 from sklearn.model_selection import train_test_split
 import nltk
 nltk.download('stopwords')
 from nltk.corpus import stopwords
 def remove_punct(text):
    translator = str.maketrans("", "", string.punctuation)
    return text.translate(translator)
 stop = set(stopwords.words("english"))
 def remove_stopwords(text):
    filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
    return " ".join(filtered_words)
 def main():
--- a/test.csv
+++ b/test.csv
--- a/train-tensorflow.py
+++ b/train-tensorflow.py
@ -1,7 +1,6 @@
 import pandas as pd
 from silence_tensorflow import silence_tensorflow
 from tensorflow import keras
 silence_tensorflow()
 from tensorflow.keras.preprocessing.text import Tokenizer
 from collections import Counter
@ -56,6 +55,9 @@ train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post",
 val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
 test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
 test_df['reviews.text'] = test_padded
 test_df.to_csv('test.csv')
 model = keras.models.Sequential()
 model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
@ -75,6 +77,8 @@ predictions = model.predict(test_padded)
 predictions = [1 if p > 0.5 else 0 for p in predictions]
 model.save('trained_model')
 file = open('results.txt', 'w')
 file.write(predictions.__str__())
 file.close()