Demo Version
This commit is contained in:
parent
756ef4277a
commit
3aefd799a6
5272
dev-0/in.tsv
Normal file
5272
dev-0/in.tsv
Normal file
File diff suppressed because one or more lines are too long
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
37
main.py
Normal file
37
main.py
Normal file
@ -0,0 +1,37 @@
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
||||
def train_model(train_in, train_expected):
|
||||
with open(train_expected, 'r', encoding='utf-8') as f:
|
||||
exp = f.readlines()
|
||||
|
||||
with open(train_in, 'r', encoding='utf-8') as f:
|
||||
train_data = f.readlines()
|
||||
|
||||
exp_encoded = LabelEncoder().fit_transform(exp)
|
||||
pipeline = Pipeline(steps=[
|
||||
('tfidf', TfidfVectorizer()),
|
||||
('naive-bayes', MultinomialNB())
|
||||
])
|
||||
|
||||
return pipeline.fit(train_data, exp_encoded)
|
||||
|
||||
|
||||
def predict(model, in_file, out_file):
|
||||
with open(in_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
prediction = model.predict(lines)
|
||||
np.savetxt(out_file, prediction, fmt='%d')
|
||||
|
||||
|
||||
def main():
|
||||
model = train_model("train/in.tsv", "train/expected.tsv")
|
||||
predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
|
||||
predict(model, "test-A/in.tsv", "test-A/out.tsv")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
5152
test-A/in.tsv
Normal file
5152
test-A/in.tsv
Normal file
File diff suppressed because one or more lines are too long
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
289579
train/in.tsv
Normal file
289579
train/in.tsv
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user