From f9172f10a0ef7f6085f995a59724c710f383c1a1 Mon Sep 17 00:00:00 2001 From: Dominik Date: Sat, 8 May 2021 22:45:55 +0200 Subject: [PATCH] Pre-final Version --- geval | Bin main.py | 38 ++++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 18 deletions(-) mode change 100644 => 100755 geval diff --git a/geval b/geval old mode 100644 new mode 100755 diff --git a/main.py b/main.py index 545f5a4..e0ba702 100644 --- a/main.py +++ b/main.py @@ -1,34 +1,36 @@ import numpy as np from sklearn.preprocessing import LabelEncoder from sklearn.naive_bayes import MultinomialNB -from sklearn.pipeline import Pipeline +from sklearn.pipeline import make_pipeline from sklearn.feature_extraction.text import TfidfVectorizer -def train_model(train_in, train_expected): - with open(train_expected, 'r', encoding='utf-8') as f: - exp = f.readlines() +def Create_model(X_tsv, Y_tsv): - with open(train_in, 'r', encoding='utf-8') as f: - train_data = f.readlines() + with open(X_tsv) as f: + X = f.readlines() - exp_encoded = LabelEncoder().fit_transform(exp) - pipeline = Pipeline(steps=[ - ('tfidf', TfidfVectorizer()), - ('naive-bayes', MultinomialNB()) - ]) + with open(Y_tsv) as f: + Y = f.readlines() - return pipeline.fit(train_data, exp_encoded) + Y = LabelEncoder().fit_transform(Y) + pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB()) + + return pipeline.fit(X, Y) -def predict(model, in_file, out_file): - with open(in_file, 'r', encoding='utf-8') as f: - lines = f.readlines() - prediction = model.predict(lines) - np.savetxt(out_file, prediction, fmt='%d') +def predict(model, X_tsv, file_name): + + with open(X_tsv) as f: + X = f.readlines() + + prediction = model.predict(X) + np.savetxt(file_name, prediction, fmt='%d') def main(): - model = train_model("train/in.tsv", "train/expected.tsv") + + model = Create_model("train/in.tsv", "train/expected.tsv") + predict(model, "dev-0/in.tsv", "dev-0/out.tsv") predict(model, "test-A/in.tsv", "test-A/out.tsv")