import os import sklearn import pandas as pd from sklearn.metrics import accuracy_score from gzip import open as open_gz from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline def evaluation(x, path_out, model): results = model.predict(x) with open(path_out, 'wt') as file: for r in results: file.write(str(r) + '\n') train = pd.read_csv('train/train.tsv', header = None, sep = '\t', error_bad_lines = False) x_train = train[1] y_train = train[0] x_dev = pd.read_csv('dev-0/in.tsv',header = None, sep = '/t',engine = 'python') x_dev = x_dev[0] x_test = pd.read_csv('test-A/in.tsv',header = None, sep = '/t',engine = 'python') x_test = x_test[0] model = make_pipeline(TfidfVectorizer(), MultinomialNB()) model.fit(x_train, y_train) evaluation(x_dev,'dev-0/out.tsv', model) evaluation(x_test,'test-A/out.tsv', model)