This commit is contained in:
AWieczarek 2024-05-28 18:36:32 +02:00
parent b8ecd36d1a
commit e9f53be954
6 changed files with 83 additions and 9 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
/beer_reviews_train.csv
/beer_reviews_test.csv
/beer_review_sentiment_model.h5
/beer_review_sentiment_predictions.csv

View File

@ -1,5 +1,6 @@
import pandas as pd
import tensorflow as tf
import sys
train_data = pd.read_csv('./beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
@ -22,6 +23,6 @@ model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
model.fit(X_train_pad, y_train, epochs=int(sys.argv[1]), batch_size=int(sys.argv[2]), validation_split=0.1)
model.save('beer_review_sentiment_model.h5')

View File

@ -1,18 +1,18 @@
import pandas as pd
import numpy as np
import tensorflow as tf
test_data = pd.read_csv('./beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
predictions = model.predict(X_test)
print(f'Predictions shape: {predictions.shape}')
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
predictions = model.predict(X_test_pad)
np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)

View File

@ -1,7 +1,7 @@
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('./beer_reviews.csv')
data = pd.read_csv('data/beer_reviews.csv')
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

46
dvc.lock Normal file
View File

@ -0,0 +1,46 @@
schema: '2.0'
stages:
split_data:
cmd: python IUM_05-split.py
deps:
- path: data/beer_reviews.csv
hash: md5
md5: 50f6eec0d0fe78bc0f10e35edd271998
size: 201644905
outs:
- path: beer_reviews_test.csv
hash: md5
md5: edbd0a7f05c59a0c0e936917f60e9b96
size: 40632354
- path: beer_reviews_train.csv
hash: md5
md5: 8c6877a26fef1542369bfae6b39d163c
size: 162599343
train_model:
cmd: python IUM_05-model.py 10 32
deps:
- path: beer_reviews_train.csv
hash: md5
md5: 8c6877a26fef1542369bfae6b39d163c
size: 162599343
outs:
- path: beer_review_sentiment_model.h5
hash: md5
md5: c126bd5d332a905262c66894585450e3
size: 1950856
predict:
cmd: python IUM_05-predict.py
deps:
- path: beer_review_sentiment_model.h5
hash: md5
md5: c126bd5d332a905262c66894585450e3
size: 1950856
- path: beer_reviews_test.csv
hash: md5
md5: edbd0a7f05c59a0c0e936917f60e9b96
size: 40632354
outs:
- path: beer_review_sentiment_predictions.csv
hash: md5
md5: 12a66fafb7f4d7d19eb0c4a90cc7d3ad
size: 4814242

23
dvc.yaml Normal file
View File

@ -0,0 +1,23 @@
stages:
split_data:
cmd: python IUM_05-split.py
deps:
- data/beer_reviews.csv
outs:
- beer_reviews_train.csv
- beer_reviews_test.csv
train_model:
cmd: python IUM_05-model.py 10 32
deps:
- beer_reviews_train.csv
outs:
- beer_review_sentiment_model.h5
predict:
cmd: python IUM_05-predict.py
deps:
- beer_review_sentiment_model.h5
- beer_reviews_test.csv
outs:
- beer_review_sentiment_predictions.csv