task done

This commit is contained in:
Hubert Tylkowski 2021-05-24 14:50:18 +02:00
parent 756ef4277a
commit eb6976bcd9
3 changed files with 10475 additions and 0 deletions

5272
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

51
solution.py Normal file
View File

@ -0,0 +1,51 @@
import numpy
import lzma
from sklearn.naive_bayes import MultinomialNB
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
TEST_A = "test-A"
DEV_0 = "dev-0"
TRAIN_IN = "./train/in.tsv.xz"
TRAIN_EXPECTED = "./train/expected.tsv"
def open_file(path):
with open(path) as file:
return file.readlines()
def open_xz(path):
with lzma.open(path, 'rt') as file:
return file.readlines()
def get_model(train_in, train_expected):
label_encoder = preprocessing.LabelEncoder()
train_expected = label_encoder.fit_transform(train_expected)
pipeline = make_pipeline(TfidfVectorizer(), MultinomialNB())
model = pipeline.fit(train_in, train_expected)
return model
def predict(train_test_in_path, train_in_path, train_expected_path):
train_in = open_xz(train_in_path)
train_expected = open_file(train_expected_path)
train_test_in = open_xz(train_test_in_path + '/in.tsv.xz')
model = get_model(train_in, train_expected)
prediction = model.predict(train_test_in)
return prediction
def save_result(path, prediction):
numpy.savetxt(path + "/out.tsv", prediction, '%d')
if __name__ == '__main__':
prediction_dev_0 = predict(DEV_0, TRAIN_IN, TRAIN_EXPECTED)
prediction_test_a = predict(TEST_A, TRAIN_IN, TRAIN_EXPECTED)
save_result(DEV_0, prediction_dev_0)
save_result(TEST_A, prediction_test_a)

5152
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff