Naive-Bayes using library
This commit is contained in:
parent
756ef4277a
commit
a489085007
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,3 +6,6 @@
|
|||||||
*.o
|
*.o
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.token
|
.token
|
||||||
|
|
||||||
|
env/
|
||||||
|
geval/
|
33
bayes.py
Normal file
33
bayes.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
|
||||||
|
with open('train/in.tsv') as f:
|
||||||
|
data_train_X = f.readlines()
|
||||||
|
|
||||||
|
with open('train/expected.tsv') as f:
|
||||||
|
data_train_Y = f.readlines()
|
||||||
|
|
||||||
|
with open('dev-0/in.tsv') as f:
|
||||||
|
data_dev_X = f.readlines()
|
||||||
|
|
||||||
|
with open('test-A/in.tsv') as f:
|
||||||
|
data_test_X = f.readlines()
|
||||||
|
|
||||||
|
data_train_Y = LabelEncoder().fit_transform(data_train_Y)
|
||||||
|
model = Pipeline(steps=[('tfidf', TfidfVectorizer()),('bayes', MultinomialNB())])
|
||||||
|
|
||||||
|
clf = model.fit(data_train_X, data_train_Y)
|
||||||
|
|
||||||
|
with open('train/out.tsv', 'w') as writer:
|
||||||
|
for result in clf.predict(data_train_X):
|
||||||
|
writer.write(str(result) + '\n')
|
||||||
|
|
||||||
|
with open('dev-0/out.tsv', 'w') as writer:
|
||||||
|
for result in clf.predict(data_dev_X):
|
||||||
|
writer.write(str(result) + '\n')
|
||||||
|
|
||||||
|
with open('test-A/out.tsv', 'w') as writer:
|
||||||
|
for result in clf.predict(data_test_X):
|
||||||
|
writer.write(str(result) + '\n')
|
5272
dev-0/in.tsv
Normal file
5272
dev-0/in.tsv
Normal file
File diff suppressed because one or more lines are too long
BIN
dev-0/in.tsv.xz
BIN
dev-0/in.tsv.xz
Binary file not shown.
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
10
requirements.txt
Normal file
10
requirements.txt
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
joblib==1.0.1
|
||||||
|
numpy==1.20.2
|
||||||
|
pandas==1.2.4
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
pytz==2021.1
|
||||||
|
scikit-learn==0.24.2
|
||||||
|
scipy==1.6.3
|
||||||
|
six==1.16.0
|
||||||
|
sklearn==0.0
|
||||||
|
threadpoolctl==2.1.0
|
5152
test-A/in.tsv
Normal file
5152
test-A/in.tsv
Normal file
File diff suppressed because one or more lines are too long
BIN
test-A/in.tsv.xz
BIN
test-A/in.tsv.xz
Binary file not shown.
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
289579
train/in.tsv
Normal file
289579
train/in.tsv
Normal file
File diff suppressed because one or more lines are too long
BIN
train/in.tsv.xz
BIN
train/in.tsv.xz
Binary file not shown.
289579
train/out.tsv
Normal file
289579
train/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user