#!/usr/bin/env python # coding: utf-8 # In[46]: import sklearn from sklearn.pipeline import make_pipeline from sklearn.feature_extraction.text import TfidfVectorizer import numpy as np from sklearn.naive_bayes import MultinomialNB from sklearn.preprocessing import LabelEncoder # In[47]: def getInput(path): with open(path,encoding='utf-8') as f: return f.readlines() # In[48]: get_ipython().system('pwd') # In[49]: train_in=getInput('./train/in.tsv') train_expected=getInput('./train/expected.tsv') test_in=getInput('./test-A/in.tsv') dev_in=getInput('./dev-0/in.tsv') dev_expected=getInput('./dev-0/expected.tsv') # In[50]: pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB()) encTransform = LabelEncoder().fit_transform(train_expected) model = pipeline.fit(train_in, encTransform) dev_predicted = model.predict(dev_in) test_predicted = model.predict(test_in) # In[ ]: # In[54]: with open('./dev-0/out.tsv', "w") as result: for out in dev_predicted: result.write(str(out) + '\n') with open('./test-A/out.tsv', "w") as result: for out in test_predicted: result.write(str(out) + '\n') # In[55]: get_ipython().system('jupyter nbconvert --to script Naiwny_bayes.ipynb') # In[ ]: