33 lines
1.0 KiB

from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
with open('train/in.tsv') as f:
data_train_X = f.readlines()
with open('train/expected.tsv') as f:
data_train_Y = f.readlines()
with open('dev-0/in.tsv') as f:
data_dev_X = f.readlines()
with open('test-A/in.tsv') as f:
data_test_X = f.readlines()
data_train_Y = LabelEncoder().fit_transform(data_train_Y)
model = Pipeline(steps=[('tfidf', TfidfVectorizer()),('bayes', MultinomialNB())])
clf =, data_train_Y)
with open('train/out.tsv', 'w') as writer:
for result in clf.predict(data_train_X):
writer.write(str(result) + '\n')
with open('dev-0/out.tsv', 'w') as writer:
for result in clf.predict(data_dev_X):
writer.write(str(result) + '\n')
with open('test-A/out.tsv', 'w') as writer:
for result in clf.predict(data_test_X):
writer.write(str(result) + '\n')