UMA-projekt/train_bert.py

109 lines
3.7 KiB
Python

from keras import Input
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.optimizers import Adam
import numpy as np
import tensorflow as tf
from keras.layers import Dense
from keras.saving.save import load_model
from keras.utils import to_categorical
from transformers import AutoTokenizer, TFBertModel
from sklearn.metrics import classification_report
import os
from termcolor import colored
def bert(df_train, df_test):
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
print("Number of GPUs available: ", len(tf.config.list_physical_devices('GPU')))
# Inicjalizacja tokenizera i modelu BERT
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
bert = TFBertModel.from_pretrained('bert-base-cased')
# Tokenizacja danych wejściowych
x_train = tokenizer(
text=df_train.Input.tolist(),
add_special_tokens=True,
max_length=68,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids=False,
return_attention_mask=True,
verbose=True)
x_test = tokenizer(
text=df_test.Input.tolist(),
add_special_tokens=True,
max_length=70,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids=False,
return_attention_mask=True,
verbose=True)
# Zamiana docelowych danych wyjściowych na odpowiedni format
y_test = df_test.Sentiment
y_train_cat = to_categorical(df_train.Sentiment)
y_test_cat = to_categorical(df_test.Sentiment)
# Utworzenie modelu: definicja architektury, kompilacja, trening i zapisanie do pliku
if os.path.isdir('bert_model'):
model = load_model('bert_model')
model.summary()
else:
max_len = 68
input_ids = Input(shape=max_len, dtype=tf.int32, name="input_ids")
input_mask = Input(shape=max_len, dtype=tf.int32, name="attention_mask")
embeddings = bert.bert(input_ids, attention_mask=input_mask)[0]
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
out = Dense(128, activation='relu')(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32, activation='relu')(out)
y = Dense(6, activation='sigmoid')(out)
model = tf.keras.Model(inputs=[input_ids, input_mask], outputs=y)
model.layers[2].trainable = True
model.summary()
optimizer = Adam(
learning_rate=5e-05,
epsilon=1e-08,
decay=0.01,
clipnorm=1.0)
loss = CategoricalCrossentropy()
metric = CategoricalAccuracy('balanced_accuracy')
model.compile(
optimizer=optimizer,
loss=loss,
metrics=metric)
train_history = model.fit(
x={'input_ids': x_train['input_ids'], 'attention_mask': x_train['attention_mask']},
y=y_train_cat,
validation_data=(
{'input_ids': x_test['input_ids'], 'attention_mask': x_test['attention_mask']}, y_test_cat
),
epochs=1,
batch_size=16
)
if not os.path.isdir('bert_model'):
model.save('bert_model')
# Dokonanie predykcji na zbiorze testowym
predicted_raw = model.predict({'input_ids': x_test['input_ids'], 'attention_mask': x_test['attention_mask']})
y_pred = np.argmax(predicted_raw, axis=1)
# Ewaluacja wyników
results_text = classification_report(y_test, y_pred)
results_dict = classification_report(y_test, y_pred, output_dict=True)
print(colored('---------- MODEL 3: BERT ----------', 'blue'))
print(colored(results_text, 'blue'))
return results_dict