bayes
This commit is contained in:
parent
9cb2fb2612
commit
ee86e0f656
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
1
dev0_accuracy.txt
Normal file
1
dev0_accuracy.txt
Normal file
@ -0,0 +1 @@
|
||||
0.9889948642699926
|
27
main.py
Normal file
27
main.py
Normal file
@ -0,0 +1,27 @@
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.pipeline import make_pipeline
|
||||
|
||||
df = pd.read_csv("train/train.tsv.gz", header=None, sep="\t", error_bad_lines=False)
|
||||
dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t", error_bad_lines=False)
|
||||
testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t", error_bad_lines=False)
|
||||
expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t", error_bad_lines=False)
|
||||
|
||||
dev0_X = dev0.iloc[:, 0].tolist()
|
||||
testA_X = testA.iloc[:, 0].tolist()
|
||||
Y = df.iloc[:, 0].tolist()
|
||||
X = df.iloc[:, 1].tolist()
|
||||
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
model.fit(X, Y)
|
||||
|
||||
predicted_dev0 = model.predict(dev0_X)
|
||||
predicted_testA = model.predict(testA_X)
|
||||
|
||||
pd.Series(predicted_dev0).to_csv("dev-0/out.tsv", header=False, sep="\t", index=False)
|
||||
pd.Series(predicted_testA).to_csv("test-A/out.tsv", header=False, sep="\t", index=False)
|
||||
dev0_accuracy = accuracy_score(expected, predicted_dev0)
|
||||
f = open("dev0_accuracy.txt", "w")
|
||||
f.write(str(dev0_accuracy))
|
||||
f.close()
|
5445
test-A/out.tsv
Normal file
5445
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user