predictions

This commit is contained in:
s406917 2021-05-31 15:05:35 +02:00
parent 9cb2fb2612
commit de84a1a2e7
3 changed files with 10923 additions and 0 deletions

5442
dev-0/out.tsv Normal file

File diff suppressed because one or more lines are too long

34
main.py Normal file
View File

@ -0,0 +1,34 @@
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import pandas as pd
train = pd.read_csv('train/train.tsv', sep='\t', header=None, error_bad_lines=False)
X_train = train[0].astype(str).tolist()
Y_train = train[1].astype(str).tolist()
naive_b = MultinomialNB()
count_vec = CountVectorizer()
Y_train=count_vec.fit_transform(Y_train)
naive_b.fit(Y_train, X_train)
dev = pd.read_csv('dev-0/in.tsv', sep='\n', header=None)
X_dev = dev[0].astype(str).tolist()
Y_dev = count_vec.transform(X_dev)
dev_predict = naive_b.predict(Y_dev)
dev_out = open('dev-0/out.tsv', 'w')
for p in dev_predict:
dev_out.write(p + '\n')
test = pd.read_csv('test-A/in.tsv', sep='\n', header=None)
X_test = test[0].astype(str).tolist()
Y_test = count_vec.transform(X_test)
test_predict = naive_b.predict(Y_test)
test_out = open('test-A/out.tsv', 'w')
for p in test_predict:
test_out.write(p + '\n')

5447
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff