This commit is contained in:
s434732 2021-05-11 17:41:32 +02:00
parent 9cb2fb2612
commit 8dc7739563
4 changed files with 103611 additions and 0 deletions

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
geval Normal file

Binary file not shown.

27
naiwny bayes2.py Normal file
View File

@ -0,0 +1,27 @@
from sklearn.naive_bayes import GaussianNB
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
r_in = './train/train.tsv'
r_ind_ev = './dev-0/in.tsv'
tsv_read = pd.read_table(r_in, error_bad_lines=False, sep='\t', header=None)
tsv_read_dev = pd.read_table(r_ind_ev, error_bad_lines=False, sep='\t', header=None)
y_train = tsv_read[0].values
X_train = tsv_read[1].values
X_dev = tsv_read_dev[0].values
vectorizer = TfidfVectorizer()
counts = vectorizer.fit_transform(X_train)
classifier = MultinomialNB()
classifier.fit(counts, y_train)
counts2 = vectorizer.transform(X_dev)
predictions = classifier.predict(counts2)
predictions.tofile("./dev-0/out.tsv", sep='\n')

98132
train/train.tsv Normal file

File diff suppressed because it is too large Load Diff