2024-04-16 18:58:43 +02:00
|
|
|
import pandas as pd
|
|
|
|
import tensorflow as tf
|
2024-05-28 18:36:32 +02:00
|
|
|
import sys
|
2024-04-16 18:58:43 +02:00
|
|
|
|
|
|
|
train_data = pd.read_csv('./beer_reviews_train.csv')
|
|
|
|
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
|
|
|
y_train = train_data['review_overall']
|
|
|
|
|
|
|
|
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
|
|
|
|
tokenizer.fit_on_texts(X_train)
|
|
|
|
X_train_seq = tokenizer.texts_to_sequences(X_train)
|
|
|
|
|
|
|
|
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
|
|
|
|
|
|
|
|
model = tf.keras.Sequential([
|
|
|
|
tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
|
|
|
|
tf.keras.layers.GlobalAveragePooling1D(),
|
|
|
|
tf.keras.layers.Dense(16, activation='relu'),
|
|
|
|
tf.keras.layers.Dense(1, activation='sigmoid')
|
|
|
|
])
|
|
|
|
|
|
|
|
model.compile(optimizer='adam',
|
|
|
|
loss='binary_crossentropy',
|
|
|
|
metrics=['accuracy'])
|
|
|
|
|
2024-05-28 18:36:32 +02:00
|
|
|
model.fit(X_train_pad, y_train, epochs=int(sys.argv[1]), batch_size=int(sys.argv[2]), validation_split=0.1)
|
2024-04-16 18:58:43 +02:00
|
|
|
|
|
|
|
model.save('beer_review_sentiment_model.h5')
|