77 lines
1.3 KiB
Python
77 lines
1.3 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# In[46]:
|
|
|
|
|
|
import sklearn
|
|
from sklearn.pipeline import make_pipeline
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
import numpy as np
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
from sklearn.preprocessing import LabelEncoder
|
|
|
|
|
|
# In[47]:
|
|
|
|
|
|
def getInput(path):
|
|
with open(path,encoding='utf-8') as f:
|
|
return f.readlines()
|
|
|
|
|
|
# In[48]:
|
|
|
|
|
|
get_ipython().system('pwd')
|
|
|
|
|
|
# In[49]:
|
|
|
|
|
|
train_in=getInput('./train/in.tsv')
|
|
train_expected=getInput('./train/expected.tsv')
|
|
test_in=getInput('./test-A/in.tsv')
|
|
dev_in=getInput('./dev-0/in.tsv')
|
|
dev_expected=getInput('./dev-0/expected.tsv')
|
|
|
|
|
|
# In[50]:
|
|
|
|
|
|
pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB())
|
|
encTransform = LabelEncoder().fit_transform(train_expected)
|
|
model = pipeline.fit(train_in, encTransform)
|
|
dev_predicted = model.predict(dev_in)
|
|
test_predicted = model.predict(test_in)
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
# In[54]:
|
|
|
|
|
|
with open('./dev-0/out.tsv', "w") as result:
|
|
for out in dev_predicted:
|
|
result.write(str(out) + '\n')
|
|
with open('./test-A/out.tsv', "w") as result:
|
|
for out in test_predicted:
|
|
result.write(str(out) + '\n')
|
|
|
|
|
|
# In[55]:
|
|
|
|
|
|
get_ipython().system('jupyter nbconvert --to script Naiwny_bayes.ipynb')
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|