IUM_05
This commit is contained in:
parent
50c62c859c
commit
e067972a05
@ -10,12 +10,13 @@ ENV TZ=Etc/UTC
|
||||
RUN apt update && \
|
||||
apt install -y python3 python3-pip unzip
|
||||
|
||||
RUN pip install kaggle pandas seaborn scikit-learn
|
||||
RUN pip install kaggle pandas seaborn scikit-learn tensorflow
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY dataset_stats.py /app/
|
||||
COPY IUM_02.py /app/
|
||||
COPY IUM_05-model.py ./
|
||||
COPY IUM_05-predict.py ./
|
||||
COPY IUM_05-split.py ./
|
||||
|
||||
CMD ["python3", "IUM_02.py"]
|
||||
|
||||
|
27
IUM_05-model.py
Normal file
27
IUM_05-model.py
Normal file
@ -0,0 +1,27 @@
|
||||
import pandas as pd
|
||||
import tensorflow as tf
|
||||
|
||||
train_data = pd.read_csv('./beer_reviews_train.csv')
|
||||
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||
y_train = train_data['review_overall']
|
||||
|
||||
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
|
||||
tokenizer.fit_on_texts(X_train)
|
||||
X_train_seq = tokenizer.texts_to_sequences(X_train)
|
||||
|
||||
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
|
||||
|
||||
model = tf.keras.Sequential([
|
||||
tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
|
||||
tf.keras.layers.GlobalAveragePooling1D(),
|
||||
tf.keras.layers.Dense(16, activation='relu'),
|
||||
tf.keras.layers.Dense(1, activation='sigmoid')
|
||||
])
|
||||
|
||||
model.compile(optimizer='adam',
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
|
||||
model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
|
||||
|
||||
model.save('beer_review_sentiment_model.h5')
|
18
IUM_05-predict.py
Normal file
18
IUM_05-predict.py
Normal file
@ -0,0 +1,18 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
test_data = pd.read_csv('./beer_reviews_test.csv')
|
||||
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||
|
||||
model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
|
||||
|
||||
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
|
||||
|
||||
|
||||
X_test_seq = tokenizer.texts_to_sequences(X_test)
|
||||
X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
|
||||
|
||||
predictions = model.predict(X_test_pad)
|
||||
|
||||
np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')
|
9
IUM_05-split.py
Normal file
9
IUM_05-split.py
Normal file
@ -0,0 +1,9 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
data = pd.read_csv('./beer_reviews.csv')
|
||||
|
||||
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
|
||||
|
||||
train_data.to_csv('beer_reviews_train.csv', index=False)
|
||||
test_data.to_csv('beer_reviews_test.csv', index=False)
|
41
Jenkinsfile
vendored
41
Jenkinsfile
vendored
@ -1,5 +1,5 @@
|
||||
pipeline {
|
||||
agent { dockerfile true }
|
||||
agent any
|
||||
|
||||
parameters {
|
||||
string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
|
||||
@ -13,22 +13,39 @@ pipeline {
|
||||
git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Download, Process, and Split Dataset') {
|
||||
stage('Download dataset') {
|
||||
steps {
|
||||
withEnv([
|
||||
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
||||
"KAGGLE_KEY=${env.KAGGLE_KEY}"
|
||||
]) {
|
||||
sh 'export KAGGLE_USERNAME=${env.KAGGLE_USERNAME}"'
|
||||
sh 'export KAGGLE_KEY=${env.KAGGLE_KEY}"'
|
||||
sh "python3 IUM_02.py"
|
||||
withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) {
|
||||
sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Archive Results') {
|
||||
stage('Process and Split Dataset') {
|
||||
agent {
|
||||
dockerfile {
|
||||
filename 'Dockerfile'
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||
sh "chmod +x ./IUM_05-split.py"
|
||||
sh "python ./IUM_05-split.py"
|
||||
archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
stage("Run") {
|
||||
agent {
|
||||
dockerfile {
|
||||
filename 'Dockerfile'
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
steps {
|
||||
sh "chmod +x ./IUM_05-model.py"
|
||||
sh "chmod +x ./IUM_05-predict.py"
|
||||
sh "python ./IUM_05-model.py"
|
||||
sh "python ./IUM_05-predict.py"
|
||||
archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user