import numpy as np from sklearn import preprocessing from sklearn.naive_bayes import GaussianNB from sklearn.feature_extraction.text import TfidfVectorizer le=preprocessing.LabelEncoder() with open("train/in.tsv") as f: data = f.readlines() vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False) vectorizer = TfidfVectorizer() x = vectorizer.fit_transform(data) X=x.toarray() with open("train/expected.tsv") as ff: Y = ff.readlines() Y=le.fit_transform(Y) with open("dev-0/in.tsv") as d: fil = d.readlines() vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False) vectorizer = TfidfVectorizer() r=vectorizer.fit_transform(fil) r=r.toarray() r=r.reshape(-1,1) gnb = GaussianNB() model=gnb.fit(X, Y) y_pred=model.predict(X) print(y_pred) y_pred=np.array(y_pred) t=np.array2string(y_pred, precision=2, separator='\n',suppress_small=True) f = open("dev-0/out.tsv", "a") f.write(t)