import os import numpy as np import tensorflow as tf from keras import Sequential from keras.layers import Dropout, Embedding, Bidirectional, LSTM, Dense from keras.saving.save import load_model from keras.utils import pad_sequences from keras_preprocessing.text import Tokenizer from sklearn.metrics import classification_report from stopwords_filter import filter_stopwords from termcolor import colored def lstm(df_train, df_test): print("Number of GPUs available: ", len(tf.config.list_physical_devices('GPU'))) # Filtrowanie stopwordów df_train = filter_stopwords(df_train) df_test = filter_stopwords(df_test) # Podzielenie danych na testowe i treningowe x_train = df_train['Input'] x_test = df_test['Input'] y_train = df_train['Sentiment'] y_test = df_test['Sentiment'] # Ustawienie hiperparametrów modelu vocab_size = 10000 embedding_dim = 128 # max_length = max([len(text) for text in (df_train['Input'].append(df_test['Input']))]) max_length = int(sum([len(text) for text in (df_train['Input'].append(df_test['Input']))]) / len(df_train['Input'].append(df_test['Input']))) epochs = 5 # Utworzenie word embeddingów tokenizer = Tokenizer(num_words=vocab_size, oov_token='') tokenizer.fit_on_texts(x_train) x_train_sequences = tokenizer.texts_to_sequences(x_train) x_train_padded = pad_sequences(x_train_sequences, maxlen=max_length, padding='post', truncating='post') x_test_sequences = tokenizer.texts_to_sequences(x_test) x_test_padded = pad_sequences(x_test_sequences, maxlen=max_length, padding='post', truncating='post') # Utworzenie modelu: definicja architektury, kompilacja, trening i zapisanie do pliku if os.path.isdir('lstm_model'): model = load_model('lstm_model') model.summary() else: model = Sequential() model.add(Embedding(vocab_size, embedding_dim)) model.add(Dropout(0.5)) model.add(Bidirectional(LSTM(embedding_dim))) model.add(Dense(6, activation='softmax')) model.summary() model.compile( loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'], ) history = model.fit(x_train_padded, y_train, epochs=epochs) model.save('lstm_model') # Dokonanie predykcji na zbiorze testowym y_pred = model.predict(x_test_padded) y_pred_cat = [] for pred in y_pred: y_pred_cat.append(np.argmax(pred)) # Ewaluacja modelu results_text = classification_report(y_test, y_pred_cat, zero_division=True) results_dict = classification_report(y_test, y_pred_cat, zero_division=True, output_dict=True) print(colored('---------- MODEL 2: LSTM ----------', 'blue')) print(colored(results_text, 'blue')) return results_dict