import pandas as pd from sklearn.model_selection import train_test_split from tensorflow.keras import layers import numpy as np import tensorflow as tf def normalization(label): return 0 if label == False else 1 def main(): data = pd.read_csv('Amazon_Consumer_Reviews.csv', header=0, sep=',') column_names = ['reviews.doRecommend', 'reviews.title'] data = data[column_names] data_train, data_test = train_test_split(data, train_size=0.6, random_state=1) data_test, data_val = train_test_split(data_test, test_size=0.5, random_state=1) train_labels = [normalization(x) for x in np.array(data_train['reviews.doRecommend'])] train_examples = np.array(data_train['reviews.title']) test_examples = np.array(data_test['reviews.title']) test_labels = [normalization(x) for x in np.array(data_test['reviews.doRecommend'])] val_labels = [normalization(x) for x in np.array(data_val['reviews.doRecommend'])] val_examples = np.array(data_val['reviews.title']) # print("Training entries: {}, test entries: {}".format(len(data_train), len(data_test))) # print(train_examples) # print(train_labels) model = tf.keras.Sequential([ layers.Input(shape=(12,)), layers.Dense(32), layers.Dense(16), layers.Dense(2, activation='softmax') ]) model.summary() model.compile( loss=tf.losses.BinaryCrossentropy(), optimizer=tf.optimizers.Adam(), metrics=[tf.keras.metrics.BinaryAccuracy()]) history = model.fit(train_examples, train_labels, epochs=40, batch_size=512, validation_data=(val_examples, val_labels), verbose=1) results = model.evaluate(test_examples, test_labels) file = open('results.txt', 'w') file.write('test loss: ' + str(results[0]) + '\n' + 'test accuracy: ' + str(results[1])) file.close() if __name__ == '__main__': main()