paranormal-or-skeptic-ISI-p.../runNB.py
2022-05-25 22:54:49 +02:00

25 lines
1008 B
Python

#!/usr/bin/env python
# coding: utf-8
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import CountVectorizer
import lzma
X_train = lzma.open("train/in.tsv.xz", mode='rt', encoding='utf-8').readlines()
y_train = open('train/expected.tsv').readlines()
X_dev0 = lzma.open("dev-0/in.tsv.xz", mode='rt', encoding='utf-8').readlines()
y_expected_dev0 = open("dev-0/expected.tsv", "r").readlines()
X_test = lzma.open("test-A/in.tsv.xz", mode='rt', encoding='utf-8').readlines()
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
X_dev0_counts = count_vect.transform(X_dev0)
X_test_counts = count_vect.transform(X_test)
clf = MultinomialNB().fit(X_train_counts, y_train)
y_predicted_dev0_MNB = clf.predict(X_dev0_counts)
y_predicted_test_MNB = clf.predict(X_test_counts)
open("dev-0/out.tsv", mode='w').writelines(y_predicted_dev0_MNB)
open("test-A/out.tsv", mode='w').writelines(y_predicted_test_MNB)