import numpy as np from sklearn import preprocessing from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline from sklearn.feature_extraction.text import TfidfVectorizer le=preprocessing.LabelEncoder() with open("train/in.tsv") as f: X = f.readlines() with open("train/expected.tsv") as ff: Y = ff.readlines() Y= le.fit_transform(Y) with open("test-A/in.tsv") as d: r = d.readlines() gnb = make_pipeline(TfidfVectorizer(),MultinomialNB()) model=gnb.fit(X, Y) y_pred=model.predict(r) y_pred=np.array(y_pred) np.set_printoptions(threshold=np.inf) t=np.array2string(y_pred.flatten(), precision=2, separator='\n',suppress_small=True) f = open("test-A/out.tsv", "a") f.write(t)