import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline df = pd.read_csv("train/train.tsv", error_bad_lines=False, header=None, sep="\t") dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t") testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t") dev0X = dev0.iloc[:, 0].tolist() testAX = testA.iloc[:, 0].tolist() Y = df.iloc[:, 0].tolist() X = df.iloc[:, 1].tolist() model = make_pipeline(TfidfVectorizer(), MultinomialNB()) model.fit(X, Y) dev0_predicted = model.predict(dev0X) testA_predicted = model.predict(testAX) pd.Series(dev0_predicted).to_csv("dev-0/out.tsv", sep="\t", index=False, header=False) pd.Series(testA_predicted).to_csv("test-A/out.tsv", sep="\t", index=False, header=False)