from sklearn import preprocessing from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline import numpy as np eng = preprocessing.LabelEncoder() with open("train/in.tsv") as myFile: tmp = myFile.readlines() with open("train/expected.tsv") as finFile: fin = finFile.readlines() fin = eng.fit_transform(fin) with open("test-A/in.tsv") as tFile: fic = tFile.readlines() gnb = make_pipeline(TfidfVectorizer(),MultinomialNB()) model = gnb.fit(tmp, fin) fin_pred = model.predict(fic) fin_pred = np.array(fin_pred) np.set_printoptions(threshold = np.inf) eFile = np.array2string(fin_pred.flatten(), precision = 2, separator = '\n',suppress_small = True) myFile = open("test-A/out.tsv", "a") myFile.write(eFile)