import pandas as pd from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error from sacred import Experiment from sacred.observers import MongoObserver, FileStorageObserver from math import sqrt ex = Experiment('464979') ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017')) ex.observers.append(FileStorageObserver('sacred_runs')) @ex.config def my_config(): epochs = 10 batch_size = 32 @ex.automain def run_experiment(epochs, batch_size, _run): train_data = pd.read_csv('beer_reviews_train.csv') X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']] y_train = train_data['review_overall'] tokenizer = Tokenizer(num_words=10000) tokenizer.fit_on_texts(X_train) X_train_seq = tokenizer.texts_to_sequences(X_train) X_train_pad = pad_sequences(X_train_seq, maxlen=100) model = Sequential([ Embedding(input_dim=10000, output_dim=16, input_length=100), GlobalAveragePooling1D(), Dense(16, activation='relu'), Dense(1, activation='sigmoid') ]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1) model.save('beer_review_sentiment_model.keras') _run.add_artifact('beer_review_model.h5') test_data = pd.read_csv('beer_reviews_test.csv') X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']] y_test = test_data['review_overall'] tokenizer = Tokenizer(num_words=10000) tokenizer.fit_on_texts(X_test) X_test_text = X_test.astype(str).agg(' '.join, axis=1) X_test_seq = tokenizer.texts_to_sequences(X_test_text) X_test_pad = pad_sequences(X_test_seq, maxlen=100) predictions = model.predict(X_test_pad) if len(predictions.shape) > 1: predictions = predictions[:, 0] results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test}) results.to_csv('beer_review_sentiment_predictions.csv', index=False) y_pred = results['Predictions'] y_test = results['Actual'] y_test_binary = (y_test >= 3).astype(int) accuracy = accuracy_score(y_test_binary, y_pred.round()) precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro') rmse = sqrt(mean_squared_error(y_test, y_pred)) print(f'Accuracy: {accuracy}') print(f'Micro-avg Precision: {precision}') print(f'Micro-avg Recall: {recall}') print(f'F1 Score: {f1}') print(f'RMSE: {rmse}') _run.add_resource('./beer_reviews_train.csv') _run.add_resource('./beer_reviews_test.csv') return accuracy