25 lines
941 B
25 lines
941 B
from sklearn import preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
import numpy as np
eng = preprocessing.LabelEncoder()
with open("train/in.tsv") as myFile:
tmp = myFile.readlines()
with open("train/expected.tsv") as finFile:
fin = finFile.readlines()
fin = eng.fit_transform(fin)
with open("test-A/in.tsv") as tFile:
fic = tFile.readlines()
gnb = make_pipeline(TfidfVectorizer(),MultinomialNB())
model = gnb.fit(tmp, fin)
fin_pred = model.predict(fic)
fin_pred = np.array(fin_pred)
np.set_printoptions(threshold = np.inf)
eFile = np.array2string(fin_pred.flatten(), precision = 2, separator = '\n',suppress_small = True)
myFile = open("test-A/out.tsv", "a")