import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline from sklearn.metrics import accuracy_score df = pd.read_csv("train/train.tsv", sep="\t", header=None, error_bad_lines=False) dev_x = pd.read_csv("dev-0/in.tsv", sep="\t", header=None, error_bad_lines=False) test_x = pd.read_csv("test-A/in.tsv", sep="\t", header=None, error_bad_lines=False) x = df[1] y = df[0] model = make_pipeline(TfidfVectorizer(), MultinomialNB()) model.fit(x,y) pred_dev = model.predict(dev_x[0]) pred_test = model.predict(test_x[0]) with open('dev-0/out.tsv', 'wt') as f: for pred in pred_dev: f.write(str(pred)+'\n') with open('test-A/out.tsv', 'wt') as f: for pred in pred_test: f.write(str(pred)+'\n')