Naive Bayes ready-made
This commit is contained in:
parent
34e7730db1
commit
754fd76874
1272
dev-0/out.tsv
1272
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
30
predict_rm.py
Normal file
30
predict_rm.py
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import pandas as pd
|
||||
import csv
|
||||
import pickle
|
||||
|
||||
def predict():
|
||||
dev0 = pd.read_csv("dev-0/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)
|
||||
testA = pd.read_csv("test-A/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)
|
||||
devdoc = dev["document"]
|
||||
testdoc = testA["document"]
|
||||
|
||||
clf = pickle.load(open("clf.model", "rb"))
|
||||
vectorizer = pickle.load(open("vectorizer.model", "rb"))
|
||||
|
||||
dev0_vectorizer = vectorizer.transform(devdoc)
|
||||
testA_vectorizer = vectorizer.transform(testdoc)
|
||||
|
||||
y_dev = clf.predict(dev0_vectorizer)
|
||||
y_test = clf.predict(testA_vectorizer)
|
||||
|
||||
with open("dev-0/out.tsv", "w") as devout:
|
||||
for line in y_dev:
|
||||
devout.write(line+"\n")
|
||||
|
||||
with open("test-A/out.tsv", "w") as testaout:
|
||||
for line in y_test:
|
||||
testaout.write(line+"\n")
|
||||
|
||||
predict()
|
1198
test-A/out.tsv
1198
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
22
train_rm.py
Normal file
22
train_rm.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import pandas as pd
|
||||
import csv
|
||||
import pickle
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
vectorizer = CountVectorizer()
|
||||
|
||||
def train():
|
||||
train = pd.read_csv("train/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)
|
||||
document = train["document"]
|
||||
y = pd.read_csv("train/expected.tsv", header=None)
|
||||
|
||||
vectorizer = CountVectorizer()
|
||||
x = vectorizer.fit_transform(document)
|
||||
clf = MultinomialNB().fit(x, y)
|
||||
pickle.dump(clf, open("clf.model", "wb"))
|
||||
pickle.dump(vectorizer, open("vectorizer.model", "wb"))
|
||||
|
||||
train()
|
||||
|
Loading…
Reference in New Issue
Block a user