This commit is contained in:
pietrzakkuba 2021-05-01 23:00:11 +02:00
parent 9cb2fb2612
commit 926700232a
4 changed files with 109076 additions and 0 deletions

40
bayes.py Normal file
View File

@ -0,0 +1,40 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import pandas as pd
# czytanie danych treningowych
train_data = pd.read_csv('train/train.tsv', sep='\t', header=None, error_bad_lines=False)
y_train = train_data.iloc[:, 0]
x_train = train_data.iloc[:, 1]
# wektoryzacji tfidf
tfidf = TfidfVectorizer()
tfidf_train = tfidf.fit_transform(x_train)
# naiwny klasyfikator Bayesa - trenowanie
bayes = MultinomialNB()
bayes.fit(tfidf_train, y_train)
# naiwny klasyfikator Bayesa - ewaluacja zbioru dev
dev_data = pd.read_csv('dev-0/in.tsv', sep='\n', header=None)
x_dev = dev_data.iloc[:, 0]
tfidf_dev = tfidf.transform(x_dev)
dev_predictions = bayes.predict(tfidf_dev)
dev_out_file = open('dev-0/out.tsv', 'w')
for prediction in dev_predictions:
dev_out_file.write(str(prediction) + '\n')
# naiwny klasyfikator Bayesa - ewaluacja zbioru test
test_data = pd.read_csv('test-A/in.tsv', sep='\n', header=None)
x_test = dev_data.iloc[:, 0]
tfidf_test = tfidf.transform(x_test)
test_predictions = bayes.predict(tfidf_test)
test_out_file = open('test-A/out.tsv', 'w')
for prediction in test_predictions:
test_out_file.write(str(prediction) + '\n')

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

5452
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

98132
train/train.tsv Normal file

File diff suppressed because it is too large Load Diff