35 lines
944 B
Python
35 lines
944 B
Python
|
|
||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||
|
from sklearn.naive_bayes import MultinomialNB
|
||
|
import pandas as pd
|
||
|
|
||
|
train = pd.read_csv('train/train.tsv', sep='\t', header=None, error_bad_lines=False)
|
||
|
X_train = train[0].astype(str).tolist()
|
||
|
Y_train = train[1].astype(str).tolist()
|
||
|
|
||
|
naive_b = MultinomialNB()
|
||
|
count_vec = CountVectorizer()
|
||
|
|
||
|
Y_train=count_vec.fit_transform(Y_train)
|
||
|
naive_b.fit(Y_train, X_train)
|
||
|
|
||
|
dev = pd.read_csv('dev-0/in.tsv', sep='\n', header=None)
|
||
|
X_dev = dev[0].astype(str).tolist()
|
||
|
Y_dev = count_vec.transform(X_dev)
|
||
|
dev_predict = naive_b.predict(Y_dev)
|
||
|
dev_out = open('dev-0/out.tsv', 'w')
|
||
|
|
||
|
for p in dev_predict:
|
||
|
dev_out.write(p + '\n')
|
||
|
|
||
|
test = pd.read_csv('test-A/in.tsv', sep='\n', header=None)
|
||
|
X_test = test[0].astype(str).tolist()
|
||
|
Y_test = count_vec.transform(X_test)
|
||
|
test_predict = naive_b.predict(Y_test)
|
||
|
test_out = open('test-A/out.tsv', 'w')
|
||
|
|
||
|
for p in test_predict:
|
||
|
test_out.write(p + '\n')
|
||
|
|
||
|
|