This commit is contained in:
test 2022-05-10 16:08:54 +02:00
parent ecfafbf86c
commit 3ee0e85a36
3 changed files with 10461 additions and 0 deletions

5272
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

37
run.py Normal file
View File

@ -0,0 +1,37 @@
import csv
from naivebayes import NaiveBayesTextClassifier
from spacy.lang.en.stop_words import STOP_WORDS
# Dane wejściowe rozpakowane ręcznie
with open('train/in.tsv', 'r', encoding='utf8') as file:
reader = csv.reader(file, delimiter='\t')
train_in = [line[0] for line in reader]
with open('train/expected.tsv', 'r', encoding='utf8') as file:
reader = csv.reader(file, delimiter='\t')
train_exp = [int(line[0]) for line in reader]
with open('dev-0/in.tsv', 'r', encoding='utf8') as file:
reader = csv.reader(file, delimiter='\t')
dev_in = [line[0] for line in reader]
with open('test-A/in.tsv', 'r', encoding='utf8') as file:
reader = csv.reader(file, delimiter='\t')
test_in = [line[0] for line in reader]
# -------------------------------------------------------------------------------------------------------------------- #
#CUTOFF = 289541 // 12
CUTOFF = 16000
classifier = NaiveBayesTextClassifier([0, 1], stop_words=STOP_WORDS)
classifier.train(train_in[:CUTOFF], train_exp[:CUTOFF])
dev_out = classifier.classify(dev_in)
with open('dev-0/out.tsv', 'w') as file:
for x in dev_out:
file.write(str(x) + '\n')
test_out = classifier.classify(test_in)
with open('test-A/out.tsv', 'w') as file:
for x in test_out:
file.write(str(x) + '\n')

5152
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff