solution
This commit is contained in:
parent
9cb2fb2612
commit
463cbd81cd
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
39
skrypt.py
Executable file
39
skrypt.py
Executable file
@ -0,0 +1,39 @@
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
import string
|
||||
import csv
|
||||
import re
|
||||
|
||||
MNB = MultinomialNB()
|
||||
vectorizer = TfidfVectorizer()
|
||||
|
||||
X = []
|
||||
Y = []
|
||||
with open("train/train.tsv", 'r', encoding="utf-8") as train:
|
||||
for line in csv.reader(train, delimiter="\t"):
|
||||
X.append(line[0])
|
||||
Y.append(line[1])
|
||||
|
||||
Y = vectorizer.fit_transform(Y)
|
||||
MNB.fit(Y, X)
|
||||
|
||||
## Wrtie dev
|
||||
dev_in = open('dev-0/in.tsv', 'r', encoding="utf-8")
|
||||
with open('dev-0/out.tsv', 'w', encoding="utf-8") as dev_out:
|
||||
dev = vectorizer.transform(dev_in.readlines())
|
||||
dev_predict = MNB.predict(dev)
|
||||
for s in dev_predict:
|
||||
dev_out.write(str(s) + '\n')
|
||||
|
||||
dev_in.close()
|
||||
|
||||
## Write test
|
||||
test_in = open('test-A/in.tsv', 'r', encoding="utf-8")
|
||||
with open('test-A/out.tsv', 'w', encoding="utf-8") as test_out:
|
||||
test = vectorizer.transform(test_in.readlines())
|
||||
test_predict = MNB.predict(test)
|
||||
for s in test_predict:
|
||||
test_out.write(str(s) + '\n')
|
||||
|
||||
test_in.close()
|
||||
|
5447
test-A/out.tsv
Normal file
5447
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user