Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
87aafc9ea9 |
24
bayes.py
Normal file
24
bayes.py
Normal file
@ -0,0 +1,24 @@
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.pipeline import make_pipeline
|
||||
|
||||
df = pd.read_csv("train/train.tsv", error_bad_lines=False, header=None, sep="\t")
|
||||
dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
|
||||
testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t")
|
||||
|
||||
dev0X = dev0.iloc[:, 0].tolist()
|
||||
testAX = testA.iloc[:, 0].tolist()
|
||||
|
||||
Y = df.iloc[:, 0].tolist()
|
||||
X = df.iloc[:, 1].tolist()
|
||||
|
||||
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
|
||||
model.fit(X, Y)
|
||||
|
||||
dev0_predicted = model.predict(dev0X)
|
||||
testA_predicted = model.predict(testAX)
|
||||
|
||||
pd.Series(dev0_predicted).to_csv("dev-0/out.tsv", sep="\t", index=False, header=False)
|
||||
pd.Series(testA_predicted).to_csv("test-A/out.tsv", sep="\t", index=False, header=False)
|
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
5447
test-A/out.tsv
Normal file
5447
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user