bayes
This commit is contained in:
parent
9cb2fb2612
commit
926700232a
40
bayes.py
Normal file
40
bayes.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# czytanie danych treningowych
|
||||||
|
train_data = pd.read_csv('train/train.tsv', sep='\t', header=None, error_bad_lines=False)
|
||||||
|
y_train = train_data.iloc[:, 0]
|
||||||
|
x_train = train_data.iloc[:, 1]
|
||||||
|
|
||||||
|
# wektoryzacji tfidf
|
||||||
|
tfidf = TfidfVectorizer()
|
||||||
|
tfidf_train = tfidf.fit_transform(x_train)
|
||||||
|
|
||||||
|
# naiwny klasyfikator Bayesa - trenowanie
|
||||||
|
bayes = MultinomialNB()
|
||||||
|
bayes.fit(tfidf_train, y_train)
|
||||||
|
|
||||||
|
# naiwny klasyfikator Bayesa - ewaluacja zbioru dev
|
||||||
|
dev_data = pd.read_csv('dev-0/in.tsv', sep='\n', header=None)
|
||||||
|
x_dev = dev_data.iloc[:, 0]
|
||||||
|
tfidf_dev = tfidf.transform(x_dev)
|
||||||
|
dev_predictions = bayes.predict(tfidf_dev)
|
||||||
|
dev_out_file = open('dev-0/out.tsv', 'w')
|
||||||
|
for prediction in dev_predictions:
|
||||||
|
dev_out_file.write(str(prediction) + '\n')
|
||||||
|
|
||||||
|
# naiwny klasyfikator Bayesa - ewaluacja zbioru test
|
||||||
|
test_data = pd.read_csv('test-A/in.tsv', sep='\n', header=None)
|
||||||
|
x_test = dev_data.iloc[:, 0]
|
||||||
|
tfidf_test = tfidf.transform(x_test)
|
||||||
|
test_predictions = bayes.predict(tfidf_test)
|
||||||
|
test_out_file = open('test-A/out.tsv', 'w')
|
||||||
|
for prediction in test_predictions:
|
||||||
|
test_out_file.write(str(prediction) + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
5452
test-A/out.tsv
Normal file
5452
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user