This commit is contained in:
parent
2841a76304
commit
4b2e314d1c
@ -3,4 +3,8 @@ FROM ubuntu:20.04
|
||||
RUN apt update && apt install -y python3 python3-pip
|
||||
|
||||
RUN pip3 install kaggle
|
||||
RUN pip3 install pandas
|
||||
RUN pip3 install pandas
|
||||
RUN pip3 install tensorflow
|
||||
RUN pip3 install numpy
|
||||
RUN pip3 install matplotlib
|
||||
RUN pip3 install sklearn
|
||||
|
43
Jenkinsfile_train_tensorflow
Normal file
43
Jenkinsfile_train_tensorflow
Normal file
@ -0,0 +1,43 @@
|
||||
pipeline {
|
||||
agent any
|
||||
parameters {
|
||||
buildSelector(
|
||||
defaultSelector: lastSuccessful(),
|
||||
description: 'Which build to use for copying artifacts',
|
||||
name: 'BUILD_SELECTOR')
|
||||
}
|
||||
stages {
|
||||
stage('Copy artifact') {
|
||||
steps {
|
||||
copyArtifacts filter: 'dev.csv, train.csv, test.csv', fingerprintArtifacts: false, projectName: 's434780-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||
}
|
||||
}
|
||||
stage('docker') {
|
||||
steps {
|
||||
script {
|
||||
def img = docker.build('s434780/ium:1.0')
|
||||
img.inside {
|
||||
sh 'chmod +x train-tensorflow.py'
|
||||
sh 'python3 ./train.tensorflow.py'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('archiveArtifacts') {
|
||||
steps {
|
||||
archiveArtifacts 'trained_model'
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
emailext body: 'Success train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
|
||||
}
|
||||
|
||||
failure {
|
||||
emailext body: 'Failed train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
|
||||
}
|
||||
|
||||
}
|
||||
}
|
28
eval-tensorflow.py
Normal file
28
eval-tensorflow.py
Normal file
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from tensorflow import keras
|
||||
from sklearn.metrics import accuracy_score, f1_score
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
model = keras.models.load_model('trained_model')
|
||||
|
||||
test_df = pd.read_csv('test.csv')
|
||||
test_x = test_df['reviews.text'].to_numpy()
|
||||
test_y = test_df['reviews.doRecommend'].to_numpy()
|
||||
|
||||
# print(test_y.shape)
|
||||
# print(test_x.shape)
|
||||
|
||||
predictions = model.predict(test_x)
|
||||
|
||||
predictions = [1 if p > 0.5 else 0 for p in predictions]
|
||||
|
||||
accuracy = accuracy_score(test_y, predictions)
|
||||
f1 = f1_score(test_y, predictions)
|
||||
|
||||
file = open('evaluation.txt', 'w')
|
||||
file.writelines(accuracy.__str__() + '\n')
|
||||
file.writelines(f1.__str__())
|
||||
file.close()
|
||||
|
14
main.py
14
main.py
@ -1,20 +1,6 @@
|
||||
import string
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
import nltk
|
||||
nltk.download('stopwords')
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
|
||||
def remove_punct(text):
|
||||
translator = str.maketrans("", "", string.punctuation)
|
||||
return text.translate(translator)
|
||||
|
||||
|
||||
stop = set(stopwords.words("english"))
|
||||
def remove_stopwords(text):
|
||||
filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
|
||||
return " ".join(filtered_words)
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -1,7 +1,6 @@
|
||||
import pandas as pd
|
||||
from silence_tensorflow import silence_tensorflow
|
||||
from tensorflow import keras
|
||||
|
||||
silence_tensorflow()
|
||||
from tensorflow.keras.preprocessing.text import Tokenizer
|
||||
from collections import Counter
|
||||
@ -56,6 +55,9 @@ train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post",
|
||||
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
|
||||
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
|
||||
|
||||
test_df['reviews.text'] = test_padded
|
||||
test_df.to_csv('test.csv')
|
||||
|
||||
|
||||
model = keras.models.Sequential()
|
||||
model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
|
||||
@ -75,6 +77,8 @@ predictions = model.predict(test_padded)
|
||||
|
||||
predictions = [1 if p > 0.5 else 0 for p in predictions]
|
||||
|
||||
model.save('trained_model')
|
||||
|
||||
file = open('results.txt', 'w')
|
||||
file.write(predictions.__str__())
|
||||
file.close()
|
Loading…
Reference in New Issue
Block a user