This commit is contained in:
parent
2841a76304
commit
4b2e314d1c
@ -3,4 +3,8 @@ FROM ubuntu:20.04
|
|||||||
RUN apt update && apt install -y python3 python3-pip
|
RUN apt update && apt install -y python3 python3-pip
|
||||||
|
|
||||||
RUN pip3 install kaggle
|
RUN pip3 install kaggle
|
||||||
RUN pip3 install pandas
|
RUN pip3 install pandas
|
||||||
|
RUN pip3 install tensorflow
|
||||||
|
RUN pip3 install numpy
|
||||||
|
RUN pip3 install matplotlib
|
||||||
|
RUN pip3 install sklearn
|
||||||
|
43
Jenkinsfile_train_tensorflow
Normal file
43
Jenkinsfile_train_tensorflow
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
pipeline {
|
||||||
|
agent any
|
||||||
|
parameters {
|
||||||
|
buildSelector(
|
||||||
|
defaultSelector: lastSuccessful(),
|
||||||
|
description: 'Which build to use for copying artifacts',
|
||||||
|
name: 'BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Copy artifact') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts filter: 'dev.csv, train.csv, test.csv', fingerprintArtifacts: false, projectName: 's434780-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('docker') {
|
||||||
|
steps {
|
||||||
|
script {
|
||||||
|
def img = docker.build('s434780/ium:1.0')
|
||||||
|
img.inside {
|
||||||
|
sh 'chmod +x train-tensorflow.py'
|
||||||
|
sh 'python3 ./train.tensorflow.py'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('archiveArtifacts') {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts 'trained_model'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post {
|
||||||
|
success {
|
||||||
|
emailext body: 'Success train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
|
||||||
|
failure {
|
||||||
|
emailext body: 'Failed train', subject: 's434780 train', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
28
eval-tensorflow.py
Normal file
28
eval-tensorflow.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from tensorflow import keras
|
||||||
|
from sklearn.metrics import accuracy_score, f1_score
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
model = keras.models.load_model('trained_model')
|
||||||
|
|
||||||
|
test_df = pd.read_csv('test.csv')
|
||||||
|
test_x = test_df['reviews.text'].to_numpy()
|
||||||
|
test_y = test_df['reviews.doRecommend'].to_numpy()
|
||||||
|
|
||||||
|
# print(test_y.shape)
|
||||||
|
# print(test_x.shape)
|
||||||
|
|
||||||
|
predictions = model.predict(test_x)
|
||||||
|
|
||||||
|
predictions = [1 if p > 0.5 else 0 for p in predictions]
|
||||||
|
|
||||||
|
accuracy = accuracy_score(test_y, predictions)
|
||||||
|
f1 = f1_score(test_y, predictions)
|
||||||
|
|
||||||
|
file = open('evaluation.txt', 'w')
|
||||||
|
file.writelines(accuracy.__str__() + '\n')
|
||||||
|
file.writelines(f1.__str__())
|
||||||
|
file.close()
|
||||||
|
|
14
main.py
14
main.py
@ -1,20 +1,6 @@
|
|||||||
import string
|
import string
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
import nltk
|
|
||||||
nltk.download('stopwords')
|
|
||||||
from nltk.corpus import stopwords
|
|
||||||
|
|
||||||
|
|
||||||
def remove_punct(text):
|
|
||||||
translator = str.maketrans("", "", string.punctuation)
|
|
||||||
return text.translate(translator)
|
|
||||||
|
|
||||||
|
|
||||||
stop = set(stopwords.words("english"))
|
|
||||||
def remove_stopwords(text):
|
|
||||||
filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
|
|
||||||
return " ".join(filtered_words)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from silence_tensorflow import silence_tensorflow
|
from silence_tensorflow import silence_tensorflow
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
|
|
||||||
silence_tensorflow()
|
silence_tensorflow()
|
||||||
from tensorflow.keras.preprocessing.text import Tokenizer
|
from tensorflow.keras.preprocessing.text import Tokenizer
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
@ -56,6 +55,9 @@ train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post",
|
|||||||
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
|
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
|
||||||
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
|
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
|
||||||
|
|
||||||
|
test_df['reviews.text'] = test_padded
|
||||||
|
test_df.to_csv('test.csv')
|
||||||
|
|
||||||
|
|
||||||
model = keras.models.Sequential()
|
model = keras.models.Sequential()
|
||||||
model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
|
model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
|
||||||
@ -75,6 +77,8 @@ predictions = model.predict(test_padded)
|
|||||||
|
|
||||||
predictions = [1 if p > 0.5 else 0 for p in predictions]
|
predictions = [1 if p > 0.5 else 0 for p in predictions]
|
||||||
|
|
||||||
|
model.save('trained_model')
|
||||||
|
|
||||||
file = open('results.txt', 'w')
|
file = open('results.txt', 'w')
|
||||||
file.write(predictions.__str__())
|
file.write(predictions.__str__())
|
||||||
file.close()
|
file.close()
|
Loading…
Reference in New Issue
Block a user