from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics import classification_report from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline from stopwords_filter import filter_stopwords from termcolor import colored def naive_bayes(df_train, df_test): # Filtrowanie stopwordów df_train = filter_stopwords(df_train) df_test = filter_stopwords(df_test) # Podzielenie danych na testowe i treningowe x_train = df_train['Input'] x_test = df_test['Input'] y_train = df_train['Sentiment'] y_test = df_test['Sentiment'] # Utworzenie i wytrenowanie modelu, predykcja na zbiorze testowym model = make_pipeline(TfidfVectorizer(), MultinomialNB()) model.fit(x_train, y_train) y_pred = model.predict(x_test) # Ewaluacja modelu results_text = classification_report(y_test, y_pred, zero_division=True) results_dict = classification_report(y_test, y_pred, zero_division=True, output_dict=True) print(colored('---------- MODEL 1: NAIVE BAYES ----------', 'blue')) print(colored(results_text, 'blue')) return results_dict