Multipipeline #wip
All checks were successful
s434780-training/pipeline/head This commit looks good

This commit is contained in:
sadurska@trui.pl 2021-05-16 20:10:28 +02:00
parent 2841a76304
commit 4b2e314d1c
6 changed files with 5745 additions and 5683 deletions

View File

@ -4,3 +4,7 @@ RUN apt update && apt install -y python3 python3-pip
RUN pip3 install kaggle
RUN pip3 install pandas
RUN pip3 install tensorflow
RUN pip3 install numpy
RUN pip3 install matplotlib
RUN pip3 install sklearn

View File

@ -0,0 +1,43 @@
pipeline {
agent any
parameters {
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'BUILD_SELECTOR')
}
stages {
stage('Copy artifact') {
steps {
copyArtifacts filter: 'dev.csv, train.csv, test.csv', fingerprintArtifacts: false, projectName: 's434780-create-dataset', selector: buildParameter('BUILD_SELECTOR')
}
}
stage('docker') {
steps {
script {
def img = docker.build('s434780/ium:1.0')
img.inside {
sh 'chmod +x train-tensorflow.py'
sh 'python3 ./train.tensorflow.py'
}
}
}
}
stage('archiveArtifacts') {
steps {
archiveArtifacts 'trained_model'
}
}
}
post {
success {
emailext body: 'Success train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
failure {
emailext body: 'Failed train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
}
}

28
eval-tensorflow.py Normal file
View File

@ -0,0 +1,28 @@
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
model = keras.models.load_model('trained_model')
test_df = pd.read_csv('test.csv')
test_x = test_df['reviews.text'].to_numpy()
test_y = test_df['reviews.doRecommend'].to_numpy()
# print(test_y.shape)
# print(test_x.shape)
predictions = model.predict(test_x)
predictions = [1 if p > 0.5 else 0 for p in predictions]
accuracy = accuracy_score(test_y, predictions)
f1 = f1_score(test_y, predictions)
file = open('evaluation.txt', 'w')
file.writelines(accuracy.__str__() + '\n')
file.writelines(f1.__str__())
file.close()

14
main.py
View File

@ -1,20 +1,6 @@
import string
import pandas as pd
from sklearn.model_selection import train_test_split
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
def remove_punct(text):
translator = str.maketrans("", "", string.punctuation)
return text.translate(translator)
stop = set(stopwords.words("english"))
def remove_stopwords(text):
filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
return " ".join(filtered_words)
def main():

11331
test.csv

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,6 @@
import pandas as pd
from silence_tensorflow import silence_tensorflow
from tensorflow import keras
silence_tensorflow()
from tensorflow.keras.preprocessing.text import Tokenizer
from collections import Counter
@ -56,6 +55,9 @@ train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post",
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
test_df['reviews.text'] = test_padded
test_df.to_csv('test.csv')
model = keras.models.Sequential()
model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
@ -75,6 +77,8 @@ predictions = model.predict(test_padded)
predictions = [1 if p > 0.5 else 0 for p in predictions]
model.save('trained_model')
file = open('results.txt', 'w')
file.write(predictions.__str__())
file.close()