This commit is contained in:
AWieczarek 2024-04-16 18:58:43 +02:00
parent 50c62c859c
commit e067972a05
5 changed files with 87 additions and 15 deletions

View File

@ -10,12 +10,13 @@ ENV TZ=Etc/UTC
RUN apt update && \
apt install -y python3 python3-pip unzip
RUN pip install kaggle pandas seaborn scikit-learn
RUN pip install kaggle pandas seaborn scikit-learn tensorflow
WORKDIR /app
COPY dataset_stats.py /app/
COPY IUM_02.py /app/
COPY IUM_05-model.py ./
COPY IUM_05-predict.py ./
COPY IUM_05-split.py ./
CMD ["python3", "IUM_02.py"]

27
IUM_05-model.py Normal file
View File

@ -0,0 +1,27 @@
import pandas as pd
import tensorflow as tf
train_data = pd.read_csv('./beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
model.save('beer_review_sentiment_model.h5')

18
IUM_05-predict.py Normal file
View File

@ -0,0 +1,18 @@
import pandas as pd
import numpy as np
import tensorflow as tf
test_data = pd.read_csv('./beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
predictions = model.predict(X_test_pad)
np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')

9
IUM_05-split.py Normal file
View File

@ -0,0 +1,9 @@
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('./beer_reviews.csv')
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
train_data.to_csv('beer_reviews_train.csv', index=False)
test_data.to_csv('beer_reviews_test.csv', index=False)

41
Jenkinsfile vendored
View File

@ -1,5 +1,5 @@
pipeline {
agent { dockerfile true }
agent any
parameters {
string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
@ -13,22 +13,39 @@ pipeline {
git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
}
}
stage('Download, Process, and Split Dataset') {
stage('Download dataset') {
steps {
withEnv([
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
"KAGGLE_KEY=${env.KAGGLE_KEY}"
]) {
sh 'export KAGGLE_USERNAME=${env.KAGGLE_USERNAME}"'
sh 'export KAGGLE_KEY=${env.KAGGLE_KEY}"'
sh "python3 IUM_02.py"
withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) {
sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip"
}
}
}
stage('Archive Results') {
stage('Process and Split Dataset') {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
sh "chmod +x ./IUM_05-split.py"
sh "python ./IUM_05-split.py"
archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true
}
}
stage("Run") {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh "chmod +x ./IUM_05-model.py"
sh "chmod +x ./IUM_05-predict.py"
sh "python ./IUM_05-model.py"
sh "python ./IUM_05-predict.py"
archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
}
}
}