Compare commits
No commits in common. "master" and "master" have entirely different histories.
5272
dev-0/out.tsv
5272
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
47
run.py
47
run.py
@ -1,47 +0,0 @@
|
|||||||
import lzma
|
|
||||||
from naivebayes import NaiveBayesTextClassifier
|
|
||||||
from spacy.lang.en.stop_words import STOP_WORDS
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
np.max_length = 1200000
|
|
||||||
|
|
||||||
def get_data(fname):
|
|
||||||
with open(fname, 'r', encoding='utf8') as file:
|
|
||||||
return file.readlines()
|
|
||||||
|
|
||||||
def get_data_zipped(fname):
|
|
||||||
with lzma.open(fname, 'r') as file:
|
|
||||||
return file.readlines()
|
|
||||||
|
|
||||||
def train_bayes(model, x, y, step=10000):
|
|
||||||
start = 0
|
|
||||||
end = step
|
|
||||||
|
|
||||||
for _ in range(0, len(x), step):
|
|
||||||
model.train(x[start:end], y[start:end])
|
|
||||||
if start + step < len(x):
|
|
||||||
start += step
|
|
||||||
else:
|
|
||||||
start = 0
|
|
||||||
end = min(start + step, len(x))
|
|
||||||
|
|
||||||
train_x = get_data_zipped('train/in.tsv.xz')
|
|
||||||
train_y = get_data('train/expected.tsv')
|
|
||||||
train_y = [int(y) for y in train_y]
|
|
||||||
|
|
||||||
test_x = get_data_zipped('test-A/in.tsv.xz')
|
|
||||||
dev_x = get_data_zipped('dev-0/in.tsv.xz')
|
|
||||||
|
|
||||||
model = NaiveBayesTextClassifier(
|
|
||||||
categories=[0, 1],
|
|
||||||
stop_words=STOP_WORDS
|
|
||||||
)
|
|
||||||
|
|
||||||
train_bayes(model, train_x, train_y)
|
|
||||||
|
|
||||||
predicted = model.classify(dev_x)
|
|
||||||
predicted2 = model.classify(test_x)
|
|
||||||
|
|
||||||
pd.DataFrame(predicted).to_csv('dev-0/out.tsv', sep='\t', header=None, encoding="utf-8", index=False)
|
|
||||||
pd.DataFrame(predicted2).to_csv('test-A/out.tsv', sep='\t', header=None, encoding="utf-8", index=False)
|
|
5152
test-A/out.tsv
5152
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user