Naive Bayes ready-made

This commit is contained in:
Łukasz Szymula 2020-03-29 21:57:33 +02:00
parent 34e7730db1
commit 754fd76874
4 changed files with 10476 additions and 10424 deletions

File diff suppressed because it is too large Load Diff

30
predict_rm.py Normal file
View File

@ -0,0 +1,30 @@
#!/usr/bin/python3
import pandas as pd
import csv
import pickle
def predict():
dev0 = pd.read_csv("dev-0/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)
testA = pd.read_csv("test-A/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)
devdoc = dev["document"]
testdoc = testA["document"]
clf = pickle.load(open("clf.model", "rb"))
vectorizer = pickle.load(open("vectorizer.model", "rb"))
dev0_vectorizer = vectorizer.transform(devdoc)
testA_vectorizer = vectorizer.transform(testdoc)
y_dev = clf.predict(dev0_vectorizer)
y_test = clf.predict(testA_vectorizer)
with open("dev-0/out.tsv", "w") as devout:
for line in y_dev:
devout.write(line+"\n")
with open("test-A/out.tsv", "w") as testaout:
for line in y_test:
testaout.write(line+"\n")
predict()

File diff suppressed because it is too large Load Diff

22
train_rm.py Normal file
View File

@ -0,0 +1,22 @@
#!/usr/bin/python3
import pandas as pd
import csv
import pickle
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
def train():
train = pd.read_csv("train/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)
document = train["document"]
y = pd.read_csv("train/expected.tsv", header=None)
vectorizer = CountVectorizer()
x = vectorizer.fit_transform(document)
clf = MultinomialNB().fit(x, y)
pickle.dump(clf, open("clf.model", "wb"))
pickle.dump(vectorizer, open("vectorizer.model", "wb"))
train()