task done
This commit is contained in:
parent
756ef4277a
commit
eb6976bcd9
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
51
solution.py
Normal file
51
solution.py
Normal file
@ -0,0 +1,51 @@
|
||||
import numpy
|
||||
import lzma
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn import preprocessing
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
||||
|
||||
TEST_A = "test-A"
|
||||
DEV_0 = "dev-0"
|
||||
TRAIN_IN = "./train/in.tsv.xz"
|
||||
TRAIN_EXPECTED = "./train/expected.tsv"
|
||||
|
||||
|
||||
def open_file(path):
|
||||
with open(path) as file:
|
||||
return file.readlines()
|
||||
|
||||
|
||||
def open_xz(path):
|
||||
with lzma.open(path, 'rt') as file:
|
||||
return file.readlines()
|
||||
|
||||
|
||||
def get_model(train_in, train_expected):
|
||||
label_encoder = preprocessing.LabelEncoder()
|
||||
train_expected = label_encoder.fit_transform(train_expected)
|
||||
pipeline = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
model = pipeline.fit(train_in, train_expected)
|
||||
return model
|
||||
|
||||
|
||||
def predict(train_test_in_path, train_in_path, train_expected_path):
|
||||
train_in = open_xz(train_in_path)
|
||||
train_expected = open_file(train_expected_path)
|
||||
train_test_in = open_xz(train_test_in_path + '/in.tsv.xz')
|
||||
model = get_model(train_in, train_expected)
|
||||
prediction = model.predict(train_test_in)
|
||||
return prediction
|
||||
|
||||
|
||||
def save_result(path, prediction):
|
||||
numpy.savetxt(path + "/out.tsv", prediction, '%d')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
prediction_dev_0 = predict(DEV_0, TRAIN_IN, TRAIN_EXPECTED)
|
||||
prediction_test_a = predict(TEST_A, TRAIN_IN, TRAIN_EXPECTED)
|
||||
|
||||
save_result(DEV_0, prediction_dev_0)
|
||||
save_result(TEST_A, prediction_test_a)
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user