Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
3a91a4f3f7 |
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
57
run.py
Normal file
57
run.py
Normal file
@ -0,0 +1,57 @@
|
||||
from naivebayes import NaiveBayesTextClassifier
|
||||
import lzma
|
||||
from spacy.lang.en.stop_words import STOP_WORDS as en_stop
|
||||
|
||||
categories_list = [0, 1]
|
||||
|
||||
classifier = NaiveBayesTextClassifier(
|
||||
categories=categories_list,
|
||||
stop_words=en_stop
|
||||
)
|
||||
|
||||
X = []
|
||||
Y = []
|
||||
with lzma.open('train/in.tsv.xz', 'r') as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
X.append(line.decode("utf-8"))
|
||||
|
||||
|
||||
with open('train/expected.tsv', 'r') as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
Y.append(int(line))
|
||||
print(len(X), len(Y))
|
||||
classifier.train(X[:15000], Y[:15000])
|
||||
classifier.train(X[15000:30000], Y[15000:30000])
|
||||
# classifier.train(X[30000:60000], Y[30000:60000])l
|
||||
# classifier.train(X[60000:90000], Y[60000:90000])
|
||||
|
||||
|
||||
test_x = []
|
||||
with lzma.open('dev-0/in.tsv.xz', 'r') as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
test_x.append(line.decode("utf-8"))
|
||||
|
||||
predicted_classes = classifier.classify(test_x)
|
||||
|
||||
|
||||
f = open("dev-0/out.tsv", "a")
|
||||
for p in predicted_classes:
|
||||
f.write(str(p) + '\n')
|
||||
f.close()
|
||||
|
||||
test_x = []
|
||||
with lzma.open('test-A/in.tsv.xz', 'r') as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
test_x.append(line.decode("utf-8"))
|
||||
|
||||
predicted_classes = classifier.classify(test_x)
|
||||
|
||||
|
||||
f = open("test-A/out.tsv", "a")
|
||||
for p in predicted_classes:
|
||||
f.write(str(p) + '\n')
|
||||
f.close()
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user