bayes solution one
This commit is contained in:
parent
756ef4277a
commit
19a5c79255
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
34
main.py
Normal file
34
main.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import numpy
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
from sklearn.naive_bayes import GaussianNB, MultinomialNB
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
|
||||||
|
|
||||||
|
def trainModel(trainFileIn, trainFileExpected):
|
||||||
|
with open(trainFileExpected, 'r') as f:
|
||||||
|
expectedData = f.readlines()
|
||||||
|
|
||||||
|
with open(trainFileIn, 'r') as f:
|
||||||
|
inData = f.readlines()
|
||||||
|
|
||||||
|
expectedDataEncoded = LabelEncoder().fit_transform(expectedData)
|
||||||
|
|
||||||
|
pipeline = Pipeline(steps=[
|
||||||
|
('tfidf', TfidfVectorizer()),
|
||||||
|
('naive-bayes', MultinomialNB())
|
||||||
|
])
|
||||||
|
|
||||||
|
return pipeline.fit(inData, expectedDataEncoded)
|
||||||
|
|
||||||
|
def evaluateModel(model, inFile, outFile):
|
||||||
|
with open(inFile, 'r') as f:
|
||||||
|
inData = f.readlines()
|
||||||
|
prediction = model.predict(inData)
|
||||||
|
numpy.savetxt(outFile, prediction, fmt='%d', delimiter='\n')
|
||||||
|
|
||||||
|
model = trainModel("train/in.tsv", "train/expected.tsv")
|
||||||
|
evaluateModel(model, "dev-0/in.tsv", "dev-0/out.tsv")
|
||||||
|
evaluateModel(model, "test-A/in.tsv", "test-A/out.tsv")
|
||||||
|
|
||||||
|
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user