task done
This commit is contained in:
parent
756ef4277a
commit
eb6976bcd9
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
51
solution.py
Normal file
51
solution.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import numpy
|
||||||
|
import lzma
|
||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from sklearn.pipeline import make_pipeline
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
|
||||||
|
|
||||||
|
TEST_A = "test-A"
|
||||||
|
DEV_0 = "dev-0"
|
||||||
|
TRAIN_IN = "./train/in.tsv.xz"
|
||||||
|
TRAIN_EXPECTED = "./train/expected.tsv"
|
||||||
|
|
||||||
|
|
||||||
|
def open_file(path):
|
||||||
|
with open(path) as file:
|
||||||
|
return file.readlines()
|
||||||
|
|
||||||
|
|
||||||
|
def open_xz(path):
|
||||||
|
with lzma.open(path, 'rt') as file:
|
||||||
|
return file.readlines()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model(train_in, train_expected):
|
||||||
|
label_encoder = preprocessing.LabelEncoder()
|
||||||
|
train_expected = label_encoder.fit_transform(train_expected)
|
||||||
|
pipeline = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||||
|
model = pipeline.fit(train_in, train_expected)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def predict(train_test_in_path, train_in_path, train_expected_path):
|
||||||
|
train_in = open_xz(train_in_path)
|
||||||
|
train_expected = open_file(train_expected_path)
|
||||||
|
train_test_in = open_xz(train_test_in_path + '/in.tsv.xz')
|
||||||
|
model = get_model(train_in, train_expected)
|
||||||
|
prediction = model.predict(train_test_in)
|
||||||
|
return prediction
|
||||||
|
|
||||||
|
|
||||||
|
def save_result(path, prediction):
|
||||||
|
numpy.savetxt(path + "/out.tsv", prediction, '%d')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
prediction_dev_0 = predict(DEV_0, TRAIN_IN, TRAIN_EXPECTED)
|
||||||
|
prediction_test_a = predict(TEST_A, TRAIN_IN, TRAIN_EXPECTED)
|
||||||
|
|
||||||
|
save_result(DEV_0, prediction_dev_0)
|
||||||
|
save_result(TEST_A, prediction_test_a)
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user