IUM_05
This commit is contained in:
parent
50c62c859c
commit
e067972a05
@ -10,12 +10,13 @@ ENV TZ=Etc/UTC
|
|||||||
RUN apt update && \
|
RUN apt update && \
|
||||||
apt install -y python3 python3-pip unzip
|
apt install -y python3 python3-pip unzip
|
||||||
|
|
||||||
RUN pip install kaggle pandas seaborn scikit-learn
|
RUN pip install kaggle pandas seaborn scikit-learn tensorflow
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY dataset_stats.py /app/
|
COPY IUM_05-model.py ./
|
||||||
COPY IUM_02.py /app/
|
COPY IUM_05-predict.py ./
|
||||||
|
COPY IUM_05-split.py ./
|
||||||
|
|
||||||
CMD ["python3", "IUM_02.py"]
|
CMD ["python3", "IUM_02.py"]
|
||||||
|
|
||||||
|
27
IUM_05-model.py
Normal file
27
IUM_05-model.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
train_data = pd.read_csv('./beer_reviews_train.csv')
|
||||||
|
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||||
|
y_train = train_data['review_overall']
|
||||||
|
|
||||||
|
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
|
||||||
|
tokenizer.fit_on_texts(X_train)
|
||||||
|
X_train_seq = tokenizer.texts_to_sequences(X_train)
|
||||||
|
|
||||||
|
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
|
||||||
|
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
|
||||||
|
tf.keras.layers.GlobalAveragePooling1D(),
|
||||||
|
tf.keras.layers.Dense(16, activation='relu'),
|
||||||
|
tf.keras.layers.Dense(1, activation='sigmoid')
|
||||||
|
])
|
||||||
|
|
||||||
|
model.compile(optimizer='adam',
|
||||||
|
loss='binary_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
|
||||||
|
model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
|
||||||
|
|
||||||
|
model.save('beer_review_sentiment_model.h5')
|
18
IUM_05-predict.py
Normal file
18
IUM_05-predict.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
test_data = pd.read_csv('./beer_reviews_test.csv')
|
||||||
|
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||||
|
|
||||||
|
model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
|
||||||
|
|
||||||
|
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
|
||||||
|
|
||||||
|
|
||||||
|
X_test_seq = tokenizer.texts_to_sequences(X_test)
|
||||||
|
X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
|
||||||
|
|
||||||
|
predictions = model.predict(X_test_pad)
|
||||||
|
|
||||||
|
np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')
|
9
IUM_05-split.py
Normal file
9
IUM_05-split.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
data = pd.read_csv('./beer_reviews.csv')
|
||||||
|
|
||||||
|
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
train_data.to_csv('beer_reviews_train.csv', index=False)
|
||||||
|
test_data.to_csv('beer_reviews_test.csv', index=False)
|
41
Jenkinsfile
vendored
41
Jenkinsfile
vendored
@ -1,5 +1,5 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent { dockerfile true }
|
agent any
|
||||||
|
|
||||||
parameters {
|
parameters {
|
||||||
string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
|
string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
|
||||||
@ -13,22 +13,39 @@ pipeline {
|
|||||||
git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
|
git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stage('Download dataset') {
|
||||||
stage('Download, Process, and Split Dataset') {
|
|
||||||
steps {
|
steps {
|
||||||
withEnv([
|
withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) {
|
||||||
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip"
|
||||||
"KAGGLE_KEY=${env.KAGGLE_KEY}"
|
|
||||||
]) {
|
|
||||||
sh 'export KAGGLE_USERNAME=${env.KAGGLE_USERNAME}"'
|
|
||||||
sh 'export KAGGLE_KEY=${env.KAGGLE_KEY}"'
|
|
||||||
sh "python3 IUM_02.py"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Archive Results') {
|
stage('Process and Split Dataset') {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
filename 'Dockerfile'
|
||||||
|
reuseNode true
|
||||||
|
}
|
||||||
|
}
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
sh "chmod +x ./IUM_05-split.py"
|
||||||
|
sh "python ./IUM_05-split.py"
|
||||||
|
archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("Run") {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
filename 'Dockerfile'
|
||||||
|
reuseNode true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
steps {
|
||||||
|
sh "chmod +x ./IUM_05-model.py"
|
||||||
|
sh "chmod +x ./IUM_05-predict.py"
|
||||||
|
sh "python ./IUM_05-model.py"
|
||||||
|
sh "python ./IUM_05-predict.py"
|
||||||
|
archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user