s444501
This commit is contained in:
parent
ecfafbf86c
commit
3ee0e85a36
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
37
run.py
Normal file
37
run.py
Normal file
@ -0,0 +1,37 @@
|
||||
import csv
|
||||
from naivebayes import NaiveBayesTextClassifier
|
||||
from spacy.lang.en.stop_words import STOP_WORDS
|
||||
|
||||
# Dane wejściowe rozpakowane ręcznie
|
||||
with open('train/in.tsv', 'r', encoding='utf8') as file:
|
||||
reader = csv.reader(file, delimiter='\t')
|
||||
train_in = [line[0] for line in reader]
|
||||
|
||||
with open('train/expected.tsv', 'r', encoding='utf8') as file:
|
||||
reader = csv.reader(file, delimiter='\t')
|
||||
train_exp = [int(line[0]) for line in reader]
|
||||
|
||||
with open('dev-0/in.tsv', 'r', encoding='utf8') as file:
|
||||
reader = csv.reader(file, delimiter='\t')
|
||||
dev_in = [line[0] for line in reader]
|
||||
|
||||
with open('test-A/in.tsv', 'r', encoding='utf8') as file:
|
||||
reader = csv.reader(file, delimiter='\t')
|
||||
test_in = [line[0] for line in reader]
|
||||
|
||||
# -------------------------------------------------------------------------------------------------------------------- #
|
||||
|
||||
#CUTOFF = 289541 // 12
|
||||
CUTOFF = 16000
|
||||
classifier = NaiveBayesTextClassifier([0, 1], stop_words=STOP_WORDS)
|
||||
classifier.train(train_in[:CUTOFF], train_exp[:CUTOFF])
|
||||
dev_out = classifier.classify(dev_in)
|
||||
|
||||
with open('dev-0/out.tsv', 'w') as file:
|
||||
for x in dev_out:
|
||||
file.write(str(x) + '\n')
|
||||
|
||||
test_out = classifier.classify(test_in)
|
||||
with open('test-A/out.tsv', 'w') as file:
|
||||
for x in test_out:
|
||||
file.write(str(x) + '\n')
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user