task(ium_04) Tensorflow improvement
This commit is contained in:
parent
f349101a1b
commit
b5b9a795fc
152
main2.py
152
main2.py
@ -1,59 +1,107 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from tensorflow.keras import layers
|
||||
import numpy as np
|
||||
import re
|
||||
import string
|
||||
from silence_tensorflow import silence_tensorflow
|
||||
silence_tensorflow()
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras import layers
|
||||
from tensorflow.keras import losses
|
||||
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
|
||||
|
||||
|
||||
def normalization(label):
|
||||
return 0 if label == False else 1
|
||||
def vectorize_text(text, label):
|
||||
text = tf.expand_dims(text, -1)
|
||||
return vectorize_layer(text), label
|
||||
|
||||
|
||||
def main():
|
||||
data = pd.read_csv('Amazon_Consumer_Reviews.csv', header=0, sep=',')
|
||||
column_names = ['reviews.doRecommend', 'reviews.title']
|
||||
data = data[column_names]
|
||||
|
||||
data_train, data_test = train_test_split(data, train_size=0.6, random_state=1)
|
||||
data_test, data_val = train_test_split(data_test, test_size=0.5, random_state=1)
|
||||
|
||||
train_labels = [normalization(x) for x in np.array(data_train['reviews.doRecommend'])]
|
||||
train_examples = np.array(data_train['reviews.title'])
|
||||
test_examples = np.array(data_test['reviews.title'])
|
||||
test_labels = [normalization(x) for x in np.array(data_test['reviews.doRecommend'])]
|
||||
val_labels = [normalization(x) for x in np.array(data_val['reviews.doRecommend'])]
|
||||
val_examples = np.array(data_val['reviews.title'])
|
||||
|
||||
# print("Training entries: {}, test entries: {}".format(len(data_train), len(data_test)))
|
||||
# print(train_examples)
|
||||
# print(train_labels)
|
||||
|
||||
model = tf.keras.Sequential([
|
||||
layers.Input(shape=(12,)),
|
||||
layers.Dense(32),
|
||||
layers.Dense(16),
|
||||
layers.Dense(2, activation='softmax')
|
||||
])
|
||||
|
||||
model.summary()
|
||||
|
||||
model.compile(
|
||||
loss=tf.losses.BinaryCrossentropy(),
|
||||
optimizer=tf.optimizers.Adam(),
|
||||
metrics=[tf.keras.metrics.BinaryAccuracy()])
|
||||
|
||||
history = model.fit(train_examples, train_labels,
|
||||
epochs=40,
|
||||
batch_size=512,
|
||||
validation_data=(val_examples, val_labels),
|
||||
verbose=1)
|
||||
|
||||
results = model.evaluate(test_examples, test_labels)
|
||||
|
||||
file = open('results.txt', 'w')
|
||||
file.write('test loss: ' + str(results[0]) + '\n' + 'test accuracy: ' + str(results[1]))
|
||||
file.close()
|
||||
def custom_standardization(input_data):
|
||||
lowercase = tf.strings.lower(input_data)
|
||||
stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
|
||||
return tf.strings.regex_replace(stripped_html, '[%s]' % re.escape(string.punctuation), '')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
batch_size = 32
|
||||
seed = 42
|
||||
|
||||
raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(
|
||||
'aclImdb/train',
|
||||
batch_size=batch_size,
|
||||
validation_split=0.2,
|
||||
subset='training',
|
||||
seed=seed)
|
||||
|
||||
raw_val_ds = tf.keras.preprocessing.text_dataset_from_directory(
|
||||
'aclImdb/train',
|
||||
batch_size=batch_size,
|
||||
validation_split=0.2,
|
||||
subset='validation',
|
||||
seed=seed)
|
||||
|
||||
raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(
|
||||
'aclImdb/test',
|
||||
batch_size=batch_size)
|
||||
|
||||
max_features = 10000
|
||||
sequence_length = 250
|
||||
|
||||
vectorize_layer = TextVectorization(
|
||||
standardize=custom_standardization,
|
||||
max_tokens=max_features,
|
||||
output_mode='int',
|
||||
output_sequence_length=sequence_length)
|
||||
|
||||
train_text = raw_train_ds.map(lambda x, y: x)
|
||||
vectorize_layer.adapt(train_text)
|
||||
|
||||
train_ds = raw_train_ds.map(vectorize_text)
|
||||
val_ds = raw_val_ds.map(vectorize_text)
|
||||
test_ds = raw_test_ds.map(vectorize_text)
|
||||
|
||||
AUTOTUNE = tf.data.AUTOTUNE
|
||||
|
||||
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
|
||||
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
|
||||
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
|
||||
|
||||
embedding_dim = 16
|
||||
|
||||
model = tf.keras.Sequential([
|
||||
layers.Embedding(max_features + 1, embedding_dim),
|
||||
layers.Dropout(0.2),
|
||||
layers.GlobalAveragePooling1D(),
|
||||
layers.Dropout(0.2),
|
||||
layers.Dense(1)])
|
||||
|
||||
model.summary()
|
||||
|
||||
model.compile(loss=losses.BinaryCrossentropy(from_logits=True),
|
||||
optimizer='adam',
|
||||
metrics=tf.metrics.BinaryAccuracy(threshold=0.0))
|
||||
|
||||
epochs = 10
|
||||
history = model.fit(
|
||||
train_ds,
|
||||
validation_data=val_ds,
|
||||
epochs=epochs)
|
||||
|
||||
loss, accuracy = model.evaluate(test_ds)
|
||||
print("Loss: ", loss)
|
||||
print("Accuracy: ", accuracy)
|
||||
|
||||
export_model = tf.keras.Sequential([
|
||||
vectorize_layer,
|
||||
model,
|
||||
layers.Activation('sigmoid')
|
||||
])
|
||||
|
||||
export_model.compile(
|
||||
loss=losses.BinaryCrossentropy(from_logits=False), optimizer="adam", metrics=['accuracy']
|
||||
)
|
||||
|
||||
loss, accuracy = export_model.evaluate(raw_test_ds)
|
||||
print("Loss: ", loss)
|
||||
print("Accuracy: ", accuracy)
|
||||
|
||||
file = open('results.txt', 'w')
|
||||
file.write('test loss: ' + loss + '\n' + 'test accuracy: ' + accuracy)
|
||||
file.close()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user