import numpy as np from sklearn.preprocessing import LabelEncoder from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline from sklearn.feature_extraction.text import TfidfVectorizer with open("train/in.tsv") as f: x_train = f.readlines() with open("train/expected.tsv") as f: y_train = f.readlines() y_train = LabelEncoder().fit_transform(y_train) pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB()) model = pipeline.fit(x_train, y_train) with open("dev-0/in.tsv") as f: x_dev = f.readlines() prediction = model.predict(x_dev) np.savetxt("dev-0/out.tsv", prediction, fmt='%d') with open("test-A/in.tsv") as f: x_test = f.readlines() prediction = model.predict(x_test) np.savetxt("test-A/out.tsv", prediction, fmt='%d')