paranormal-or-skeptic/train_rm.py
2020-03-29 21:57:33 +02:00

23 lines
687 B
Python

#!/usr/bin/python3
import pandas as pd
import csv
import pickle
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
def train():
train = pd.read_csv("train/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)
document = train["document"]
y = pd.read_csv("train/expected.tsv", header=None)
vectorizer = CountVectorizer()
x = vectorizer.fit_transform(document)
clf = MultinomialNB().fit(x, y)
pickle.dump(clf, open("clf.model", "wb"))
pickle.dump(vectorizer, open("vectorizer.model", "wb"))
train()