paranormal-or-skeptic-isi-p.../main.py
2021-09-30 18:56:11 +02:00

31 lines
1.1 KiB
Python

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import pandas as pd
trainX = pd.read_csv('./train/in.tsv', sep = '\t', header = None)
trainY = pd.read_csv('./train/expected.tsv', sep = '\t', header = None)
train_X = trainX[0]
train_Y = trainY[0][:289541]
test_X_dev0 = pd.read_csv('./dev-0/in.tsv', sep = '\t', header = None)
test_Y_dev0 = pd.read_csv('./dev-0/expected.tsv', sep = '\t', header = None)
test_X_dev0 = test_X_dev0[0]
test_X_A = pd.read_csv('./test-A/in.tsv', sep = '\t', header = None)
test_X_A = test_X_A[0]
vectorizer = CountVectorizer()
features_train = vectorizer.fit_transform(train_X)
features_test_dev0 = vectorizer.transform(test_X_dev0)
features_test_testA = vectorizer.transform(test_X_A)
model = MultinomialNB()
model.fit(features_train, train_Y)
y_dev0_pred = model.predict(features_test_dev0)
y_testA_pred = model.predict(features_test_testA)
pd.DataFrame(y_dev0_pred).to_csv('./dev-0/out.tsv', header=None, sep='\t', index=False)
pd.DataFrame(y_testA_pred).to_csv('./test-A/out.tsv', header=None, sep='\t', index=False)