Pre-final Version
This commit is contained in:
parent
3aefd799a6
commit
f9172f10a0
38
main.py
38
main.py
@ -1,34 +1,36 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
from sklearn.naive_bayes import MultinomialNB
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import make_pipeline
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
|
||||||
def train_model(train_in, train_expected):
|
def Create_model(X_tsv, Y_tsv):
|
||||||
with open(train_expected, 'r', encoding='utf-8') as f:
|
|
||||||
exp = f.readlines()
|
|
||||||
|
|
||||||
with open(train_in, 'r', encoding='utf-8') as f:
|
with open(X_tsv) as f:
|
||||||
train_data = f.readlines()
|
X = f.readlines()
|
||||||
|
|
||||||
exp_encoded = LabelEncoder().fit_transform(exp)
|
with open(Y_tsv) as f:
|
||||||
pipeline = Pipeline(steps=[
|
Y = f.readlines()
|
||||||
('tfidf', TfidfVectorizer()),
|
|
||||||
('naive-bayes', MultinomialNB())
|
|
||||||
])
|
|
||||||
|
|
||||||
return pipeline.fit(train_data, exp_encoded)
|
Y = LabelEncoder().fit_transform(Y)
|
||||||
|
pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB())
|
||||||
|
|
||||||
|
return pipeline.fit(X, Y)
|
||||||
|
|
||||||
|
|
||||||
def predict(model, in_file, out_file):
|
def predict(model, X_tsv, file_name):
|
||||||
with open(in_file, 'r', encoding='utf-8') as f:
|
|
||||||
lines = f.readlines()
|
with open(X_tsv) as f:
|
||||||
prediction = model.predict(lines)
|
X = f.readlines()
|
||||||
np.savetxt(out_file, prediction, fmt='%d')
|
|
||||||
|
prediction = model.predict(X)
|
||||||
|
np.savetxt(file_name, prediction, fmt='%d')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
model = train_model("train/in.tsv", "train/expected.tsv")
|
|
||||||
|
model = Create_model("train/in.tsv", "train/expected.tsv")
|
||||||
|
|
||||||
predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
|
predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
|
||||||
predict(model, "test-A/in.tsv", "test-A/out.tsv")
|
predict(model, "test-A/in.tsv", "test-A/out.tsv")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user