IUM_10
This commit is contained in:
parent
b8ecd36d1a
commit
e9f53be954
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
/beer_reviews_train.csv
|
||||||
|
/beer_reviews_test.csv
|
||||||
|
/beer_review_sentiment_model.h5
|
||||||
|
/beer_review_sentiment_predictions.csv
|
@ -1,5 +1,6 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
import sys
|
||||||
|
|
||||||
train_data = pd.read_csv('./beer_reviews_train.csv')
|
train_data = pd.read_csv('./beer_reviews_train.csv')
|
||||||
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||||
@ -22,6 +23,6 @@ model.compile(optimizer='adam',
|
|||||||
loss='binary_crossentropy',
|
loss='binary_crossentropy',
|
||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
|
|
||||||
model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
|
model.fit(X_train_pad, y_train, epochs=int(sys.argv[1]), batch_size=int(sys.argv[2]), validation_split=0.1)
|
||||||
|
|
||||||
model.save('beer_review_sentiment_model.h5')
|
model.save('beer_review_sentiment_model.h5')
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
test_data = pd.read_csv('./beer_reviews_test.csv')
|
test_data = pd.read_csv('./beer_reviews_test.csv')
|
||||||
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||||
|
y_test = test_data['review_overall']
|
||||||
|
|
||||||
model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
|
model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
|
||||||
|
|
||||||
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
|
predictions = model.predict(X_test)
|
||||||
|
|
||||||
|
print(f'Predictions shape: {predictions.shape}')
|
||||||
|
|
||||||
X_test_seq = tokenizer.texts_to_sequences(X_test)
|
if len(predictions.shape) > 1:
|
||||||
X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
|
predictions = predictions[:, 0]
|
||||||
|
|
||||||
predictions = model.predict(X_test_pad)
|
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
|
||||||
|
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
|
||||||
np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')
|
|
@ -1,7 +1,7 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
data = pd.read_csv('./beer_reviews.csv')
|
data = pd.read_csv('data/beer_reviews.csv')
|
||||||
|
|
||||||
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
|
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
46
dvc.lock
Normal file
46
dvc.lock
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
schema: '2.0'
|
||||||
|
stages:
|
||||||
|
split_data:
|
||||||
|
cmd: python IUM_05-split.py
|
||||||
|
deps:
|
||||||
|
- path: data/beer_reviews.csv
|
||||||
|
hash: md5
|
||||||
|
md5: 50f6eec0d0fe78bc0f10e35edd271998
|
||||||
|
size: 201644905
|
||||||
|
outs:
|
||||||
|
- path: beer_reviews_test.csv
|
||||||
|
hash: md5
|
||||||
|
md5: edbd0a7f05c59a0c0e936917f60e9b96
|
||||||
|
size: 40632354
|
||||||
|
- path: beer_reviews_train.csv
|
||||||
|
hash: md5
|
||||||
|
md5: 8c6877a26fef1542369bfae6b39d163c
|
||||||
|
size: 162599343
|
||||||
|
train_model:
|
||||||
|
cmd: python IUM_05-model.py 10 32
|
||||||
|
deps:
|
||||||
|
- path: beer_reviews_train.csv
|
||||||
|
hash: md5
|
||||||
|
md5: 8c6877a26fef1542369bfae6b39d163c
|
||||||
|
size: 162599343
|
||||||
|
outs:
|
||||||
|
- path: beer_review_sentiment_model.h5
|
||||||
|
hash: md5
|
||||||
|
md5: c126bd5d332a905262c66894585450e3
|
||||||
|
size: 1950856
|
||||||
|
predict:
|
||||||
|
cmd: python IUM_05-predict.py
|
||||||
|
deps:
|
||||||
|
- path: beer_review_sentiment_model.h5
|
||||||
|
hash: md5
|
||||||
|
md5: c126bd5d332a905262c66894585450e3
|
||||||
|
size: 1950856
|
||||||
|
- path: beer_reviews_test.csv
|
||||||
|
hash: md5
|
||||||
|
md5: edbd0a7f05c59a0c0e936917f60e9b96
|
||||||
|
size: 40632354
|
||||||
|
outs:
|
||||||
|
- path: beer_review_sentiment_predictions.csv
|
||||||
|
hash: md5
|
||||||
|
md5: 12a66fafb7f4d7d19eb0c4a90cc7d3ad
|
||||||
|
size: 4814242
|
23
dvc.yaml
Normal file
23
dvc.yaml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
stages:
|
||||||
|
split_data:
|
||||||
|
cmd: python IUM_05-split.py
|
||||||
|
deps:
|
||||||
|
- data/beer_reviews.csv
|
||||||
|
outs:
|
||||||
|
- beer_reviews_train.csv
|
||||||
|
- beer_reviews_test.csv
|
||||||
|
|
||||||
|
train_model:
|
||||||
|
cmd: python IUM_05-model.py 10 32
|
||||||
|
deps:
|
||||||
|
- beer_reviews_train.csv
|
||||||
|
outs:
|
||||||
|
- beer_review_sentiment_model.h5
|
||||||
|
|
||||||
|
predict:
|
||||||
|
cmd: python IUM_05-predict.py
|
||||||
|
deps:
|
||||||
|
- beer_review_sentiment_model.h5
|
||||||
|
- beer_reviews_test.csv
|
||||||
|
outs:
|
||||||
|
- beer_review_sentiment_predictions.csv
|
Loading…
Reference in New Issue
Block a user