bayes solution one

This commit is contained in:
Mariusz B 2021-05-01 15:25:14 +00:00
parent 756ef4277a
commit 19a5c79255
3 changed files with 10458 additions and 0 deletions

5272
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

34
main.py Normal file
View File

@ -0,0 +1,34 @@
import numpy
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
def trainModel(trainFileIn, trainFileExpected):
with open(trainFileExpected, 'r') as f:
expectedData = f.readlines()
with open(trainFileIn, 'r') as f:
inData = f.readlines()
expectedDataEncoded = LabelEncoder().fit_transform(expectedData)
pipeline = Pipeline(steps=[
('tfidf', TfidfVectorizer()),
('naive-bayes', MultinomialNB())
])
return pipeline.fit(inData, expectedDataEncoded)
def evaluateModel(model, inFile, outFile):
with open(inFile, 'r') as f:
inData = f.readlines()
prediction = model.predict(inData)
numpy.savetxt(outFile, prediction, fmt='%d', delimiter='\n')
model = trainModel("train/in.tsv", "train/expected.tsv")
evaluateModel(model, "dev-0/in.tsv", "dev-0/out.tsv")
evaluateModel(model, "test-A/in.tsv", "test-A/out.tsv")

5152
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff