UMA-projekt/train_naive_bayes.py

34 lines
1.1 KiB
Python
Raw Normal View History

2022-06-19 13:16:05 +02:00
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from stopwords_filter import filter_stopwords
from termcolor import colored
def naive_bayes(df_train, df_test):
# Filtrowanie stopwordów
df_train = filter_stopwords(df_train)
df_test = filter_stopwords(df_test)
# Podzielenie danych na testowe i treningowe
x_train = df_train['Input']
x_test = df_test['Input']
y_train = df_train['Sentiment']
y_test = df_test['Sentiment']
# Utworzenie i wytrenowanie modelu, predykcja na zbiorze testowym
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
# Ewaluacja modelu
results_text = classification_report(y_test, y_pred, zero_division=True)
results_dict = classification_report(y_test, y_pred, zero_division=True, output_dict=True)
print(colored('---------- MODEL 1: NAIVE BAYES ----------', 'blue'))
print(colored(results_text, 'blue'))
return results_dict