This commit is contained in:
Piotrek96 2021-05-04 13:07:13 +02:00
parent 9cb2fb2612
commit 463cbd81cd
4 changed files with 109070 additions and 0 deletions

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

39
skrypt.py Executable file
View File

@ -0,0 +1,39 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import string
import csv
import re
MNB = MultinomialNB()
vectorizer = TfidfVectorizer()
X = []
Y = []
with open("train/train.tsv", 'r', encoding="utf-8") as train:
for line in csv.reader(train, delimiter="\t"):
X.append(line[0])
Y.append(line[1])
Y = vectorizer.fit_transform(Y)
MNB.fit(Y, X)
## Wrtie dev
dev_in = open('dev-0/in.tsv', 'r', encoding="utf-8")
with open('dev-0/out.tsv', 'w', encoding="utf-8") as dev_out:
dev = vectorizer.transform(dev_in.readlines())
dev_predict = MNB.predict(dev)
for s in dev_predict:
dev_out.write(str(s) + '\n')
dev_in.close()
## Write test
test_in = open('test-A/in.tsv', 'r', encoding="utf-8")
with open('test-A/out.tsv', 'w', encoding="utf-8") as test_out:
test = vectorizer.transform(test_in.readlines())
test_predict = MNB.predict(test)
for s in test_predict:
test_out.write(str(s) + '\n')
test_in.close()

5447
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

98132
train/train.tsv Normal file

File diff suppressed because it is too large Load Diff