Multipipeline #wip
All checks were successful
s434780-training/pipeline/head This commit looks good

This commit is contained in:
sadurska@trui.pl 2021-05-16 20:10:28 +02:00
parent 2841a76304
commit 4b2e314d1c
6 changed files with 5745 additions and 5683 deletions

View File

@ -4,3 +4,7 @@ RUN apt update && apt install -y python3 python3-pip
RUN pip3 install kaggle RUN pip3 install kaggle
RUN pip3 install pandas RUN pip3 install pandas
RUN pip3 install tensorflow
RUN pip3 install numpy
RUN pip3 install matplotlib
RUN pip3 install sklearn

View File

@ -0,0 +1,43 @@
pipeline {
agent any
parameters {
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'BUILD_SELECTOR')
}
stages {
stage('Copy artifact') {
steps {
copyArtifacts filter: 'dev.csv, train.csv, test.csv', fingerprintArtifacts: false, projectName: 's434780-create-dataset', selector: buildParameter('BUILD_SELECTOR')
}
}
stage('docker') {
steps {
script {
def img = docker.build('s434780/ium:1.0')
img.inside {
sh 'chmod +x train-tensorflow.py'
sh 'python3 ./train.tensorflow.py'
}
}
}
}
stage('archiveArtifacts') {
steps {
archiveArtifacts 'trained_model'
}
}
}
post {
success {
emailext body: 'Success train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
failure {
emailext body: 'Failed train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
}
}

28
eval-tensorflow.py Normal file
View File

@ -0,0 +1,28 @@
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
model = keras.models.load_model('trained_model')
test_df = pd.read_csv('test.csv')
test_x = test_df['reviews.text'].to_numpy()
test_y = test_df['reviews.doRecommend'].to_numpy()
# print(test_y.shape)
# print(test_x.shape)
predictions = model.predict(test_x)
predictions = [1 if p > 0.5 else 0 for p in predictions]
accuracy = accuracy_score(test_y, predictions)
f1 = f1_score(test_y, predictions)
file = open('evaluation.txt', 'w')
file.writelines(accuracy.__str__() + '\n')
file.writelines(f1.__str__())
file.close()

14
main.py
View File

@ -1,20 +1,6 @@
import string import string
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
def remove_punct(text):
translator = str.maketrans("", "", string.punctuation)
return text.translate(translator)
stop = set(stopwords.words("english"))
def remove_stopwords(text):
filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
return " ".join(filtered_words)
def main(): def main():

11331
test.csv

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,6 @@
import pandas as pd import pandas as pd
from silence_tensorflow import silence_tensorflow from silence_tensorflow import silence_tensorflow
from tensorflow import keras from tensorflow import keras
silence_tensorflow() silence_tensorflow()
from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.text import Tokenizer
from collections import Counter from collections import Counter
@ -56,6 +55,9 @@ train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post",
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post") val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post") test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
test_df['reviews.text'] = test_padded
test_df.to_csv('test.csv')
model = keras.models.Sequential() model = keras.models.Sequential()
model.add(layers.Embedding(num_unique_words, 32, input_length=max_length)) model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
@ -75,6 +77,8 @@ predictions = model.predict(test_padded)
predictions = [1 if p > 0.5 else 0 for p in predictions] predictions = [1 if p > 0.5 else 0 for p in predictions]
model.save('trained_model')
file = open('results.txt', 'w') file = open('results.txt', 'w')
file.write(predictions.__str__()) file.write(predictions.__str__())
file.close() file.close()