solution
This commit is contained in:
parent
9cb2fb2612
commit
463cbd81cd
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
39
skrypt.py
Executable file
39
skrypt.py
Executable file
@ -0,0 +1,39 @@
|
|||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
import string
|
||||||
|
import csv
|
||||||
|
import re
|
||||||
|
|
||||||
|
MNB = MultinomialNB()
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
|
||||||
|
X = []
|
||||||
|
Y = []
|
||||||
|
with open("train/train.tsv", 'r', encoding="utf-8") as train:
|
||||||
|
for line in csv.reader(train, delimiter="\t"):
|
||||||
|
X.append(line[0])
|
||||||
|
Y.append(line[1])
|
||||||
|
|
||||||
|
Y = vectorizer.fit_transform(Y)
|
||||||
|
MNB.fit(Y, X)
|
||||||
|
|
||||||
|
## Wrtie dev
|
||||||
|
dev_in = open('dev-0/in.tsv', 'r', encoding="utf-8")
|
||||||
|
with open('dev-0/out.tsv', 'w', encoding="utf-8") as dev_out:
|
||||||
|
dev = vectorizer.transform(dev_in.readlines())
|
||||||
|
dev_predict = MNB.predict(dev)
|
||||||
|
for s in dev_predict:
|
||||||
|
dev_out.write(str(s) + '\n')
|
||||||
|
|
||||||
|
dev_in.close()
|
||||||
|
|
||||||
|
## Write test
|
||||||
|
test_in = open('test-A/in.tsv', 'r', encoding="utf-8")
|
||||||
|
with open('test-A/out.tsv', 'w', encoding="utf-8") as test_out:
|
||||||
|
test = vectorizer.transform(test_in.readlines())
|
||||||
|
test_predict = MNB.predict(test)
|
||||||
|
for s in test_predict:
|
||||||
|
test_out.write(str(s) + '\n')
|
||||||
|
|
||||||
|
test_in.close()
|
||||||
|
|
5447
test-A/out.tsv
Normal file
5447
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user