task(ium_04) Tensorflow improvement

This commit is contained in:
Aleksandra Sadurska 2021-05-06 22:50:47 +02:00
parent f349101a1b
commit b5b9a795fc

152
main2.py
View File

@ -1,59 +1,107 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
import numpy as np
import re
import string
from silence_tensorflow import silence_tensorflow
silence_tensorflow()
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
def normalization(label):
return 0 if label == False else 1
def vectorize_text(text, label):
text = tf.expand_dims(text, -1)
return vectorize_layer(text), label
def main():
data = pd.read_csv('Amazon_Consumer_Reviews.csv', header=0, sep=',')
column_names = ['reviews.doRecommend', 'reviews.title']
data = data[column_names]
data_train, data_test = train_test_split(data, train_size=0.6, random_state=1)
data_test, data_val = train_test_split(data_test, test_size=0.5, random_state=1)
train_labels = [normalization(x) for x in np.array(data_train['reviews.doRecommend'])]
train_examples = np.array(data_train['reviews.title'])
test_examples = np.array(data_test['reviews.title'])
test_labels = [normalization(x) for x in np.array(data_test['reviews.doRecommend'])]
val_labels = [normalization(x) for x in np.array(data_val['reviews.doRecommend'])]
val_examples = np.array(data_val['reviews.title'])
# print("Training entries: {}, test entries: {}".format(len(data_train), len(data_test)))
# print(train_examples)
# print(train_labels)
model = tf.keras.Sequential([
layers.Input(shape=(12,)),
layers.Dense(32),
layers.Dense(16),
layers.Dense(2, activation='softmax')
])
model.summary()
model.compile(
loss=tf.losses.BinaryCrossentropy(),
optimizer=tf.optimizers.Adam(),
metrics=[tf.keras.metrics.BinaryAccuracy()])
history = model.fit(train_examples, train_labels,
epochs=40,
batch_size=512,
validation_data=(val_examples, val_labels),
verbose=1)
results = model.evaluate(test_examples, test_labels)
file = open('results.txt', 'w')
file.write('test loss: ' + str(results[0]) + '\n' + 'test accuracy: ' + str(results[1]))
file.close()
def custom_standardization(input_data):
lowercase = tf.strings.lower(input_data)
stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
return tf.strings.regex_replace(stripped_html, '[%s]' % re.escape(string.punctuation), '')
if __name__ == '__main__':
main()
batch_size = 32
seed = 42
raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(
'aclImdb/train',
batch_size=batch_size,
validation_split=0.2,
subset='training',
seed=seed)
raw_val_ds = tf.keras.preprocessing.text_dataset_from_directory(
'aclImdb/train',
batch_size=batch_size,
validation_split=0.2,
subset='validation',
seed=seed)
raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(
'aclImdb/test',
batch_size=batch_size)
max_features = 10000
sequence_length = 250
vectorize_layer = TextVectorization(
standardize=custom_standardization,
max_tokens=max_features,
output_mode='int',
output_sequence_length=sequence_length)
train_text = raw_train_ds.map(lambda x, y: x)
vectorize_layer.adapt(train_text)
train_ds = raw_train_ds.map(vectorize_text)
val_ds = raw_val_ds.map(vectorize_text)
test_ds = raw_test_ds.map(vectorize_text)
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
embedding_dim = 16
model = tf.keras.Sequential([
layers.Embedding(max_features + 1, embedding_dim),
layers.Dropout(0.2),
layers.GlobalAveragePooling1D(),
layers.Dropout(0.2),
layers.Dense(1)])
model.summary()
model.compile(loss=losses.BinaryCrossentropy(from_logits=True),
optimizer='adam',
metrics=tf.metrics.BinaryAccuracy(threshold=0.0))
epochs = 10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs)
loss, accuracy = model.evaluate(test_ds)
print("Loss: ", loss)
print("Accuracy: ", accuracy)
export_model = tf.keras.Sequential([
vectorize_layer,
model,
layers.Activation('sigmoid')
])
export_model.compile(
loss=losses.BinaryCrossentropy(from_logits=False), optimizer="adam", metrics=['accuracy']
)
loss, accuracy = export_model.evaluate(raw_test_ds)
print("Loss: ", loss)
print("Accuracy: ", accuracy)
file = open('results.txt', 'w')
file.write('test loss: ' + loss + '\n' + 'test accuracy: ' + accuracy)
file.close()