diff --git a/run.py b/run.py new file mode 100644 index 0000000..d8ba89c --- /dev/null +++ b/run.py @@ -0,0 +1,31 @@ +import os +import sklearn +import pandas as pd +from sklearn.metrics import accuracy_score +from gzip import open as open_gz +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.naive_bayes import MultinomialNB +from sklearn.pipeline import make_pipeline + +def evaluation(x, path_out, model): + results = model.predict(x) + + with open(path_out, 'wt') as file: + for r in results: + file.write(str(r) + '\n') + + +train = pd.read_csv('train/train.tsv', header = None, sep = '\t', error_bad_lines = False) + +x_train = train[1] +y_train = train[0] +x_dev = pd.read_csv('dev-0/in.tsv',header = None, sep = '/t',engine = 'python') +x_dev = x_dev[0] +x_test = pd.read_csv('test-A/in.tsv',header = None, sep = '/t',engine = 'python') +x_test = x_test[0] + +model = make_pipeline(TfidfVectorizer(), MultinomialNB()) +model.fit(x_train, y_train) + +evaluation(x_dev,'dev-0/out.tsv', model) +evaluation(x_test,'test-A/out.tsv', model) \ No newline at end of file