Compare commits

...

1 Commits

Author SHA1 Message Date
87aafc9ea9 Bayes2 solution 2021-05-06 10:35:19 +02:00
5 changed files with 109057 additions and 2 deletions

24
bayes.py Normal file
View File

@ -0,0 +1,24 @@
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
df = pd.read_csv("train/train.tsv", error_bad_lines=False, header=None, sep="\t")
dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t")
dev0X = dev0.iloc[:, 0].tolist()
testAX = testA.iloc[:, 0].tolist()
Y = df.iloc[:, 0].tolist()
X = df.iloc[:, 1].tolist()
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X, Y)
dev0_predicted = model.predict(dev0X)
testA_predicted = model.predict(testAX)
pd.Series(dev0_predicted).to_csv("dev-0/out.tsv", sep="\t", index=False, header=False)
pd.Series(testA_predicted).to_csv("test-A/out.tsv", sep="\t", index=False, header=False)

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

5447
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

98132
train/train.tsv Normal file

File diff suppressed because it is too large Load Diff