first commit
This commit is contained in:
commit
883032dac6
5452
dev-0/expected.tsv
Normal file
5452
dev-0/expected.tsv
Normal file
File diff suppressed because it is too large
Load Diff
5452
dev-0/in.tsv
Normal file
5452
dev-0/in.tsv
Normal file
File diff suppressed because it is too large
Load Diff
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
61
main.py
Normal file
61
main.py
Normal file
@ -0,0 +1,61 @@
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from stop_words import get_stop_words
|
||||
from numpy import random
|
||||
|
||||
stop_words = get_stop_words('polish')
|
||||
|
||||
v = TfidfVectorizer(stop_words=None)
|
||||
naive_bayes=MultinomialNB()
|
||||
|
||||
ball_train = pd.read_csv('train/train.tsv', sep='\t', error_bad_lines=False, header=None)
|
||||
ball_dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\t', error_bad_lines=False, header=None)
|
||||
|
||||
y_train = pd.DataFrame(ball_train[0])
|
||||
x_train = pd.DataFrame(ball_train[1])
|
||||
x_np=x_train.to_numpy()
|
||||
x_np = [str(item) for item in x_np]
|
||||
|
||||
x_train=v.fit_transform(x_np)
|
||||
|
||||
naive_bayes.fit(x_train, y_train)
|
||||
|
||||
ball_dev_in = pd.read_csv('dev-0/in.tsv', sep='\t', error_bad_lines=False, header=None)
|
||||
|
||||
X_dev = pd.DataFrame(ball_dev_in)
|
||||
X_dev_np=X_dev.to_numpy()
|
||||
X_dev_np = [str(item) for item in X_dev_np]
|
||||
X_dev=v.transform(X_dev_np)
|
||||
|
||||
model = LogisticRegression() # definicja modelu
|
||||
model.fit(x_train, y_train) # dopasowanie modelu
|
||||
|
||||
|
||||
Y_dev_predictedNB = naive_bayes.predict(X_dev)
|
||||
Y_dev_predicted_baseline=np.ones_like(Y_dev_predictedNB)
|
||||
Y_dev_predicted_random=random.choice([0,1],size=len(Y_dev_predictedNB))
|
||||
Y_dev_predictedLogReg=model.predict(X_dev)
|
||||
|
||||
print(f1_score(ball_dev_expected, Y_dev_predicted_baseline))
|
||||
print(f1_score(ball_dev_expected, Y_dev_predictedLogReg))
|
||||
print(f1_score(ball_dev_expected, Y_dev_predicted_random))
|
||||
print(f1_score(ball_dev_expected, Y_dev_predictedNB))
|
||||
print()
|
||||
print(accuracy_score(ball_dev_expected, Y_dev_predicted_baseline))
|
||||
print(accuracy_score(ball_dev_expected, Y_dev_predictedLogReg))
|
||||
print(accuracy_score(ball_dev_expected, Y_dev_predicted_random))
|
||||
print(accuracy_score(ball_dev_expected, Y_dev_predictedNB))
|
||||
print()
|
||||
print(precision_score(ball_dev_expected, Y_dev_predicted_baseline))
|
||||
print(precision_score(ball_dev_expected, Y_dev_predictedLogReg))
|
||||
print(precision_score(ball_dev_expected, Y_dev_predicted_random))
|
||||
print(precision_score(ball_dev_expected, Y_dev_predictedNB))
|
||||
print()
|
||||
print(recall_score(ball_dev_expected, Y_dev_predicted_baseline))
|
||||
print(recall_score(ball_dev_expected, Y_dev_predictedLogReg))
|
||||
print(recall_score(ball_dev_expected, Y_dev_predicted_random))
|
||||
print(recall_score(ball_dev_expected, Y_dev_predictedNB))
|
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user