25 lines
941 B
Python
25 lines
941 B
Python
|
from sklearn import preprocessing
|
||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||
|
from sklearn.naive_bayes import MultinomialNB
|
||
|
from sklearn.pipeline import make_pipeline
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
eng = preprocessing.LabelEncoder()
|
||
|
|
||
|
with open("train/in.tsv") as myFile:
|
||
|
tmp = myFile.readlines()
|
||
|
with open("train/expected.tsv") as finFile:
|
||
|
fin = finFile.readlines()
|
||
|
fin = eng.fit_transform(fin)
|
||
|
with open("test-A/in.tsv") as tFile:
|
||
|
fic = tFile.readlines()
|
||
|
gnb = make_pipeline(TfidfVectorizer(),MultinomialNB())
|
||
|
model = gnb.fit(tmp, fin)
|
||
|
fin_pred = model.predict(fic)
|
||
|
fin_pred = np.array(fin_pred)
|
||
|
np.set_printoptions(threshold = np.inf)
|
||
|
eFile = np.array2string(fin_pred.flatten(), precision = 2, separator = '\n',suppress_small = True)
|
||
|
myFile = open("test-A/out.tsv", "a")
|
||
|
myFile.write(eFile)
|