Naive-Bayes using library
This commit is contained in:
parent
756ef4277a
commit
a489085007
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,3 +6,6 @@
|
||||
*.o
|
||||
.DS_Store
|
||||
.token
|
||||
|
||||
env/
|
||||
geval/
|
33
bayes.py
Normal file
33
bayes.py
Normal file
@ -0,0 +1,33 @@
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
with open('train/in.tsv') as f:
|
||||
data_train_X = f.readlines()
|
||||
|
||||
with open('train/expected.tsv') as f:
|
||||
data_train_Y = f.readlines()
|
||||
|
||||
with open('dev-0/in.tsv') as f:
|
||||
data_dev_X = f.readlines()
|
||||
|
||||
with open('test-A/in.tsv') as f:
|
||||
data_test_X = f.readlines()
|
||||
|
||||
data_train_Y = LabelEncoder().fit_transform(data_train_Y)
|
||||
model = Pipeline(steps=[('tfidf', TfidfVectorizer()),('bayes', MultinomialNB())])
|
||||
|
||||
clf = model.fit(data_train_X, data_train_Y)
|
||||
|
||||
with open('train/out.tsv', 'w') as writer:
|
||||
for result in clf.predict(data_train_X):
|
||||
writer.write(str(result) + '\n')
|
||||
|
||||
with open('dev-0/out.tsv', 'w') as writer:
|
||||
for result in clf.predict(data_dev_X):
|
||||
writer.write(str(result) + '\n')
|
||||
|
||||
with open('test-A/out.tsv', 'w') as writer:
|
||||
for result in clf.predict(data_test_X):
|
||||
writer.write(str(result) + '\n')
|
5272
dev-0/in.tsv
Normal file
5272
dev-0/in.tsv
Normal file
File diff suppressed because one or more lines are too long
BIN
dev-0/in.tsv.xz
BIN
dev-0/in.tsv.xz
Binary file not shown.
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
10
requirements.txt
Normal file
10
requirements.txt
Normal file
@ -0,0 +1,10 @@
|
||||
joblib==1.0.1
|
||||
numpy==1.20.2
|
||||
pandas==1.2.4
|
||||
python-dateutil==2.8.1
|
||||
pytz==2021.1
|
||||
scikit-learn==0.24.2
|
||||
scipy==1.6.3
|
||||
six==1.16.0
|
||||
sklearn==0.0
|
||||
threadpoolctl==2.1.0
|
5152
test-A/in.tsv
Normal file
5152
test-A/in.tsv
Normal file
File diff suppressed because one or more lines are too long
BIN
test-A/in.tsv.xz
BIN
test-A/in.tsv.xz
Binary file not shown.
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
289579
train/in.tsv
Normal file
289579
train/in.tsv
Normal file
File diff suppressed because one or more lines are too long
BIN
train/in.tsv.xz
BIN
train/in.tsv.xz
Binary file not shown.
289579
train/out.tsv
Normal file
289579
train/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user