from sklearn.naive_bayes import GaussianNB import pandas as pd from sklearn.naive_bayes import MultinomialNB from sklearn.feature_extraction.text import TfidfVectorizer r_in = './train/train.tsv' r_ind_ev = './dev-0/in.tsv' tsv_read = pd.read_table(r_in, error_bad_lines=False, sep='\t', header=None) tsv_read_dev = pd.read_table(r_ind_ev, error_bad_lines=False, sep='\t', header=None) y_train = tsv_read[0].values X_train = tsv_read[1].values X_dev = tsv_read_dev[0].values vectorizer = TfidfVectorizer() counts = vectorizer.fit_transform(X_train) classifier = MultinomialNB() classifier.fit(counts, y_train) counts2 = vectorizer.transform(X_dev) predictions = classifier.predict(counts2) predictions.tofile("./dev-0/out.tsv", sep='\n')