59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
import pandas as pd
|
|
from sklearn.model_selection import train_test_split
|
|
from tensorflow.keras import layers
|
|
import numpy as np
|
|
import tensorflow as tf
|
|
|
|
|
|
def normalization(label):
|
|
return 0 if label == False else 1
|
|
|
|
|
|
def main():
|
|
data = pd.read_csv('Amazon_Consumer_Reviews.csv', header=0, sep=',')
|
|
column_names = ['reviews.doRecommend', 'reviews.title']
|
|
data = data[column_names]
|
|
|
|
data_train, data_test = train_test_split(data, train_size=0.6, random_state=1)
|
|
data_test, data_val = train_test_split(data_test, test_size=0.5, random_state=1)
|
|
|
|
train_labels = [normalization(x) for x in np.array(data_train['reviews.doRecommend'])]
|
|
train_examples = np.array(data_train['reviews.title'])
|
|
test_examples = np.array(data_test['reviews.title'])
|
|
test_labels = [normalization(x) for x in np.array(data_test['reviews.doRecommend'])]
|
|
val_labels = [normalization(x) for x in np.array(data_val['reviews.doRecommend'])]
|
|
val_examples = np.array(data_val['reviews.title'])
|
|
|
|
# print("Training entries: {}, test entries: {}".format(len(data_train), len(data_test)))
|
|
# print(train_examples)
|
|
# print(train_labels)
|
|
|
|
model = tf.keras.Sequential([
|
|
layers.Input(shape=(12,)),
|
|
layers.Dense(32),
|
|
layers.Dense(16),
|
|
layers.Dense(2, activation='softmax')
|
|
])
|
|
|
|
model.summary()
|
|
|
|
model.compile(
|
|
loss=tf.losses.BinaryCrossentropy(),
|
|
optimizer=tf.optimizers.Adam(),
|
|
metrics=[tf.keras.metrics.BinaryAccuracy()])
|
|
|
|
history = model.fit(train_examples, train_labels,
|
|
epochs=40,
|
|
batch_size=512,
|
|
validation_data=(val_examples, val_labels),
|
|
verbose=1)
|
|
|
|
results = model.evaluate(test_examples, test_labels)
|
|
|
|
file = open('results.txt', 'w')
|
|
file.write('test loss: ' + str(results[0]) + '\n' + 'test accuracy: ' + str(results[1]))
|
|
file.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |