first solution

This commit is contained in:
Adi 2022-05-03 21:54:27 +02:00
parent a24c7aaa3f
commit 88d6ee2200

39
run.py
View File

@ -0,0 +1,39 @@
import re
states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
def count_strings(text_in, search_str):
pattern = re.compile(search_str)
return len(pattern.findall(text_in, re.IGNORECASE))
def predict_state(text):
state_dict = {}
for state in states:
state_dict[state.replace(" ", "_")] = count_strings(text, state)
return max(state_dict, key=state_dict.get)
def get_jurisdiction(file_in, file_out):
with open(file_in, 'r', encoding='utf8') as f_in:
lines = f_in.readlines()
with open(file_out, 'wt')as f_out:
for x in lines:
f_out.write("jurisdiction="+str(predict_state(x))+'\n')
f_out.close()
get_jurisdiction('dev-0/in.tsv', 'dev-0/out.tsv')
get_jurisdiction('train/in.tsv', 'train/out.tsv')
get_jurisdiction('test-A/in.tsv', 'test-A/out.tsv')