Demo Version

This commit is contained in:
Dominik Strzako 2021-05-08 19:02:05 +02:00
parent 756ef4277a
commit 3aefd799a6
7 changed files with 310464 additions and 0 deletions

5272
dev-0/in.tsv Normal file

File diff suppressed because one or more lines are too long

5272
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
geval Normal file

Binary file not shown.

37
main.py Normal file
View File

@ -0,0 +1,37 @@
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
def train_model(train_in, train_expected):
with open(train_expected, 'r', encoding='utf-8') as f:
exp = f.readlines()
with open(train_in, 'r', encoding='utf-8') as f:
train_data = f.readlines()
exp_encoded = LabelEncoder().fit_transform(exp)
pipeline = Pipeline(steps=[
('tfidf', TfidfVectorizer()),
('naive-bayes', MultinomialNB())
])
return pipeline.fit(train_data, exp_encoded)
def predict(model, in_file, out_file):
with open(in_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
prediction = model.predict(lines)
np.savetxt(out_file, prediction, fmt='%d')
def main():
model = train_model("train/in.tsv", "train/expected.tsv")
predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
predict(model, "test-A/in.tsv", "test-A/out.tsv")
if __name__ == '__main__':
main()

5152
test-A/in.tsv Normal file

File diff suppressed because one or more lines are too long

5152
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

289579
train/in.tsv Normal file

File diff suppressed because one or more lines are too long