34 lines
1.1 KiB
Python
34 lines
1.1 KiB
Python
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||
|
from sklearn.metrics import classification_report
|
||
|
from sklearn.naive_bayes import MultinomialNB
|
||
|
from sklearn.pipeline import make_pipeline
|
||
|
from stopwords_filter import filter_stopwords
|
||
|
from termcolor import colored
|
||
|
|
||
|
|
||
|
def naive_bayes(df_train, df_test):
|
||
|
|
||
|
# Filtrowanie stopwordów
|
||
|
df_train = filter_stopwords(df_train)
|
||
|
df_test = filter_stopwords(df_test)
|
||
|
|
||
|
# Podzielenie danych na testowe i treningowe
|
||
|
x_train = df_train['Input']
|
||
|
x_test = df_test['Input']
|
||
|
y_train = df_train['Sentiment']
|
||
|
y_test = df_test['Sentiment']
|
||
|
|
||
|
# Utworzenie i wytrenowanie modelu, predykcja na zbiorze testowym
|
||
|
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||
|
model.fit(x_train, y_train)
|
||
|
y_pred = model.predict(x_test)
|
||
|
|
||
|
# Ewaluacja modelu
|
||
|
results_text = classification_report(y_test, y_pred, zero_division=True)
|
||
|
results_dict = classification_report(y_test, y_pred, zero_division=True, output_dict=True)
|
||
|
|
||
|
print(colored('---------- MODEL 1: NAIVE BAYES ----------', 'blue'))
|
||
|
print(colored(results_text, 'blue'))
|
||
|
|
||
|
return results_dict
|