Pre-final Version

This commit is contained in:
Dominik 2021-05-08 22:45:55 +02:00
parent 3aefd799a6
commit f9172f10a0
2 changed files with 20 additions and 18 deletions

0
geval Normal file → Executable file
View File

38
main.py
View File

@ -1,34 +1,36 @@
import numpy as np import numpy as np
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
def train_model(train_in, train_expected): def Create_model(X_tsv, Y_tsv):
with open(train_expected, 'r', encoding='utf-8') as f:
exp = f.readlines()
with open(train_in, 'r', encoding='utf-8') as f: with open(X_tsv) as f:
train_data = f.readlines() X = f.readlines()
exp_encoded = LabelEncoder().fit_transform(exp) with open(Y_tsv) as f:
pipeline = Pipeline(steps=[ Y = f.readlines()
('tfidf', TfidfVectorizer()),
('naive-bayes', MultinomialNB())
])
return pipeline.fit(train_data, exp_encoded) Y = LabelEncoder().fit_transform(Y)
pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB())
return pipeline.fit(X, Y)
def predict(model, in_file, out_file): def predict(model, X_tsv, file_name):
with open(in_file, 'r', encoding='utf-8') as f:
lines = f.readlines() with open(X_tsv) as f:
prediction = model.predict(lines) X = f.readlines()
np.savetxt(out_file, prediction, fmt='%d')
prediction = model.predict(X)
np.savetxt(file_name, prediction, fmt='%d')
def main(): def main():
model = train_model("train/in.tsv", "train/expected.tsv")
model = Create_model("train/in.tsv", "train/expected.tsv")
predict(model, "dev-0/in.tsv", "dev-0/out.tsv") predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
predict(model, "test-A/in.tsv", "test-A/out.tsv") predict(model, "test-A/in.tsv", "test-A/out.tsv")