Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
ad6d4f7f28 |
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
86
run.py
Normal file
86
run.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[171]:
|
||||||
|
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
|
||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
|
||||||
|
|
||||||
|
# In[172]:
|
||||||
|
|
||||||
|
|
||||||
|
f = open("train\in.tsv", "r", encoding='utf-8')
|
||||||
|
lines = f.readlines()
|
||||||
|
x_train = pd.DataFrame(lines)
|
||||||
|
x_train.rename(columns = {0 : 'text'}, inplace = True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[173]:
|
||||||
|
|
||||||
|
|
||||||
|
y_train = pd.read_csv('train\expected.tsv', sep='\t', names=['paranormal'], encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
# In[174]:
|
||||||
|
|
||||||
|
|
||||||
|
tfidf_vectorizer=TfidfVectorizer(max_df=0.95, max_features=500)
|
||||||
|
tfidf_vectorizer.fit_transform(x_train['text'].values)
|
||||||
|
x_train_prepared = tfidf_vectorizer.transform(x_train['text'].values).toarray()
|
||||||
|
|
||||||
|
|
||||||
|
# In[175]:
|
||||||
|
|
||||||
|
|
||||||
|
mnb = MultinomialNB()
|
||||||
|
model_mnb = mnb.fit(x_train_prepared, y_train.values.ravel())
|
||||||
|
|
||||||
|
|
||||||
|
# In[176]:
|
||||||
|
|
||||||
|
|
||||||
|
f = open("dev-0\in.tsv", "r", encoding='utf-8')
|
||||||
|
lines = f.readlines()
|
||||||
|
x_dev = pd.DataFrame(lines)
|
||||||
|
x_dev.rename(columns = {0 : 'text'}, inplace = True)
|
||||||
|
|
||||||
|
x_dev_prepared = tfidf_vectorizer.transform(x_dev['text'].values).toarray()
|
||||||
|
|
||||||
|
y_dev = pd.read_csv('dev-0\expected.tsv', sep='\t', names=['paranormal'], encoding='utf-8')
|
||||||
|
|
||||||
|
y_dev_pred = model_mnb.predict(x_dev_prepared)
|
||||||
|
|
||||||
|
|
||||||
|
# In[177]:
|
||||||
|
|
||||||
|
|
||||||
|
file = open('dev-0\out.tsv', 'w')
|
||||||
|
for y in y_dev_pred:
|
||||||
|
file.write(f'{y}\n')
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
|
# In[179]:
|
||||||
|
|
||||||
|
|
||||||
|
f = open("test-A\in.tsv", "r", encoding='utf-8')
|
||||||
|
lines = f.readlines()
|
||||||
|
x_test = pd.DataFrame(lines)
|
||||||
|
x_test.rename(columns = {0 : 'text'}, inplace = True)
|
||||||
|
|
||||||
|
x_test_prepared = tfidf_vectorizer.transform(x_test['text'].values).toarray()
|
||||||
|
|
||||||
|
y_test_pred = model_mnb.predict(x_test_prepared)
|
||||||
|
|
||||||
|
|
||||||
|
# In[180]:
|
||||||
|
|
||||||
|
|
||||||
|
file = open('test-A\out.tsv', 'w')
|
||||||
|
for y in y_test_pred:
|
||||||
|
file.write(f'{y}\n')
|
||||||
|
file.close()
|
||||||
|
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user