ium_434780/main2.py

59 lines
1.9 KiB
Python
Raw Normal View History

2021-04-25 22:14:32 +02:00
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
import numpy as np
import tensorflow as tf
def normalization(label):
return 0 if label == False else 1
def main():
data = pd.read_csv('Amazon_Consumer_Reviews.csv', header=0, sep=',')
column_names = ['reviews.doRecommend', 'reviews.title']
data = data[column_names]
data_train, data_test = train_test_split(data, train_size=0.6, random_state=1)
data_test, data_val = train_test_split(data_test, test_size=0.5, random_state=1)
train_labels = [normalization(x) for x in np.array(data_train['reviews.doRecommend'])]
train_examples = np.array(data_train['reviews.title'])
test_examples = np.array(data_test['reviews.title'])
test_labels = [normalization(x) for x in np.array(data_test['reviews.doRecommend'])]
val_labels = [normalization(x) for x in np.array(data_val['reviews.doRecommend'])]
val_examples = np.array(data_val['reviews.title'])
# print("Training entries: {}, test entries: {}".format(len(data_train), len(data_test)))
# print(train_examples)
# print(train_labels)
model = tf.keras.Sequential([
layers.Input(shape=(12,)),
layers.Dense(32),
layers.Dense(16),
layers.Dense(2, activation='softmax')
])
model.summary()
model.compile(
loss=tf.losses.BinaryCrossentropy(),
optimizer=tf.optimizers.Adam(),
metrics=[tf.keras.metrics.BinaryAccuracy()])
history = model.fit(train_examples, train_labels,
epochs=40,
batch_size=512,
validation_data=(val_examples, val_labels),
verbose=1)
results = model.evaluate(test_examples, test_labels)
file = open('results.txt', 'w')
file.write('test loss: ' + str(results[0]) + '\n' + 'test accuracy: ' + str(results[1]))
file.close()
if __name__ == '__main__':
main()