Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
926700232a |
40
bayes.py
Normal file
40
bayes.py
Normal file
@ -0,0 +1,40 @@
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
import pandas as pd
|
||||
|
||||
# czytanie danych treningowych
|
||||
train_data = pd.read_csv('train/train.tsv', sep='\t', header=None, error_bad_lines=False)
|
||||
y_train = train_data.iloc[:, 0]
|
||||
x_train = train_data.iloc[:, 1]
|
||||
|
||||
# wektoryzacji tfidf
|
||||
tfidf = TfidfVectorizer()
|
||||
tfidf_train = tfidf.fit_transform(x_train)
|
||||
|
||||
# naiwny klasyfikator Bayesa - trenowanie
|
||||
bayes = MultinomialNB()
|
||||
bayes.fit(tfidf_train, y_train)
|
||||
|
||||
# naiwny klasyfikator Bayesa - ewaluacja zbioru dev
|
||||
dev_data = pd.read_csv('dev-0/in.tsv', sep='\n', header=None)
|
||||
x_dev = dev_data.iloc[:, 0]
|
||||
tfidf_dev = tfidf.transform(x_dev)
|
||||
dev_predictions = bayes.predict(tfidf_dev)
|
||||
dev_out_file = open('dev-0/out.tsv', 'w')
|
||||
for prediction in dev_predictions:
|
||||
dev_out_file.write(str(prediction) + '\n')
|
||||
|
||||
# naiwny klasyfikator Bayesa - ewaluacja zbioru test
|
||||
test_data = pd.read_csv('test-A/in.tsv', sep='\n', header=None)
|
||||
x_test = dev_data.iloc[:, 0]
|
||||
tfidf_test = tfidf.transform(x_test)
|
||||
test_predictions = bayes.predict(tfidf_test)
|
||||
test_out_file = open('test-A/out.tsv', 'w')
|
||||
for prediction in test_predictions:
|
||||
test_out_file.write(str(prediction) + '\n')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
5452
test-A/out.tsv
Normal file
5452
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user