IUM_07
This commit is contained in:
parent
e9f53be954
commit
88ab8d9d4d
@ -10,7 +10,7 @@ ENV TZ=Etc/UTC
|
||||
RUN apt update && \
|
||||
apt install -y python3 python3-pip unzip
|
||||
|
||||
RUN pip install kaggle pandas seaborn scikit-learn tensorflow
|
||||
RUN pip install kaggle pandas seaborn scikit-learn tensorflow sacred pymongo --break-system-packages
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
14
Jenkinsfile
vendored
14
Jenkinsfile
vendored
@ -48,5 +48,19 @@ pipeline {
|
||||
archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Sacred') {
|
||||
steps {
|
||||
sh 'chmod +x sacred/sacred_training_model.py'
|
||||
sh 'python3 sacred/sacred_training_model.py'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from Experiments') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'sacred_runs/**/*.*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
812346
sacred/beer_reviews.csv
Normal file
812346
sacred/beer_reviews.csv
Normal file
File diff suppressed because it is too large
Load Diff
162470
sacred/beer_reviews_test.csv
Normal file
162470
sacred/beer_reviews_test.csv
Normal file
File diff suppressed because it is too large
Load Diff
649877
sacred/beer_reviews_train.csv
Normal file
649877
sacred/beer_reviews_train.csv
Normal file
File diff suppressed because it is too large
Load Diff
84
sacred/sacred_training_model.py
Normal file
84
sacred/sacred_training_model.py
Normal file
@ -0,0 +1,84 @@
|
||||
import pandas as pd
|
||||
from tensorflow.keras.preprocessing.text import Tokenizer
|
||||
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
|
||||
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
|
||||
from sacred import Experiment
|
||||
from sacred.observers import MongoObserver, FileStorageObserver
|
||||
from math import sqrt
|
||||
|
||||
ex = Experiment('464979')
|
||||
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
|
||||
ex.observers.append(FileStorageObserver('sacred_runs'))
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
epochs = 10
|
||||
batch_size = 32
|
||||
|
||||
@ex.automain
|
||||
def run_experiment(epochs, batch_size, _run):
|
||||
train_data = pd.read_csv('beer_reviews_train.csv')
|
||||
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||
y_train = train_data['review_overall']
|
||||
|
||||
tokenizer = Tokenizer(num_words=10000)
|
||||
tokenizer.fit_on_texts(X_train)
|
||||
X_train_seq = tokenizer.texts_to_sequences(X_train)
|
||||
|
||||
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
|
||||
|
||||
model = Sequential([
|
||||
Embedding(input_dim=10000, output_dim=16, input_length=100),
|
||||
GlobalAveragePooling1D(),
|
||||
Dense(16, activation='relu'),
|
||||
Dense(1, activation='sigmoid')
|
||||
])
|
||||
|
||||
model.compile(optimizer='adam',
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
|
||||
model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)
|
||||
|
||||
model.save('beer_review_sentiment_model.keras')
|
||||
_run.add_artifact('beer_review_model.h5')
|
||||
|
||||
test_data = pd.read_csv('beer_reviews_test.csv')
|
||||
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||
y_test = test_data['review_overall']
|
||||
|
||||
tokenizer = Tokenizer(num_words=10000)
|
||||
tokenizer.fit_on_texts(X_test)
|
||||
|
||||
X_test_text = X_test.astype(str).agg(' '.join, axis=1)
|
||||
X_test_seq = tokenizer.texts_to_sequences(X_test_text)
|
||||
X_test_pad = pad_sequences(X_test_seq, maxlen=100)
|
||||
|
||||
predictions = model.predict(X_test_pad)
|
||||
|
||||
if len(predictions.shape) > 1:
|
||||
predictions = predictions[:, 0]
|
||||
|
||||
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
|
||||
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
|
||||
|
||||
y_pred = results['Predictions']
|
||||
y_test = results['Actual']
|
||||
y_test_binary = (y_test >= 3).astype(int)
|
||||
|
||||
accuracy = accuracy_score(y_test_binary, y_pred.round())
|
||||
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
|
||||
rmse = sqrt(mean_squared_error(y_test, y_pred))
|
||||
|
||||
print(f'Accuracy: {accuracy}')
|
||||
print(f'Micro-avg Precision: {precision}')
|
||||
print(f'Micro-avg Recall: {recall}')
|
||||
print(f'F1 Score: {f1}')
|
||||
print(f'RMSE: {rmse}')
|
||||
|
||||
_run.add_resource('./beer_reviews_train.csv')
|
||||
_run.add_resource('./beer_reviews_test.csv')
|
||||
|
||||
return accuracy
|
Loading…
Reference in New Issue
Block a user