first solution
This commit is contained in:
parent
a24c7aaa3f
commit
88d6ee2200
39
run.py
39
run.py
@ -0,0 +1,39 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
|
||||||
|
'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
|
||||||
|
'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
|
||||||
|
'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
|
||||||
|
'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
|
||||||
|
|
||||||
|
def count_strings(text_in, search_str):
|
||||||
|
pattern = re.compile(search_str)
|
||||||
|
return len(pattern.findall(text_in, re.IGNORECASE))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def predict_state(text):
|
||||||
|
state_dict = {}
|
||||||
|
for state in states:
|
||||||
|
state_dict[state.replace(" ", "_")] = count_strings(text, state)
|
||||||
|
|
||||||
|
return max(state_dict, key=state_dict.get)
|
||||||
|
|
||||||
|
|
||||||
|
def get_jurisdiction(file_in, file_out):
|
||||||
|
|
||||||
|
with open(file_in, 'r', encoding='utf8') as f_in:
|
||||||
|
lines = f_in.readlines()
|
||||||
|
|
||||||
|
with open(file_out, 'wt')as f_out:
|
||||||
|
for x in lines:
|
||||||
|
f_out.write("jurisdiction="+str(predict_state(x))+'\n')
|
||||||
|
|
||||||
|
f_out.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
get_jurisdiction('dev-0/in.tsv', 'dev-0/out.tsv')
|
||||||
|
get_jurisdiction('train/in.tsv', 'train/out.tsv')
|
||||||
|
get_jurisdiction('test-A/in.tsv', 'test-A/out.tsv')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user