kleister-nda/run.ipynb
2022-05-04 00:23:23 +02:00

6.7 KiB
Raw Blame History

import re
states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia', 
        'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 
        'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 
        'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 
        'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
def counter(text_in, query):
    pattern = re.compile(query)
    return len(pattern.findall(text_in, re.IGNORECASE))
def state_prediction(text_in):
    state_dict = {}
    for state in states:
        state_dict[state.replace(" ", "_")] = counter(text_in, state)     
    return state_dict
def jurisdiction(path_in, path_out):    
    with open(path_in, 'r', encoding='utf8') as file:
        lines = file.readlines()
        lines = lines.replace('.', ' ').replace(',', ' ').lower()
    with open(path_out, 'wt')as file_out:
        for i in lines:
            file_out.write("jurisdiction="+str(state_prediction(i))+'\n')            
    file_out.close()
jurisdiction('dev-0/in.tsv', 'dev-0/out.tsv')
jurisdiction('train/in.tsv', 'train/out.tsv')
jurisdiction('test-A/in.tsv', 'test-A/out.tsv')
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
C:\Users\SEBAST~1\AppData\Local\Temp/ipykernel_3180/658503462.py in <module>
----> 1 jurisdiction('dev-0/in.tsv', 'dev-0/out.tsv')
      2 jurisdiction('train/in.tsv', 'train/out.tsv')
      3 jurisdiction('test-A/in.tsv', 'test-A/out.tsv')

C:\Users\SEBAST~1\AppData\Local\Temp/ipykernel_3180/2629600059.py in jurisdiction(path_in, path_out)
      2     with open(path_in, 'r', encoding='utf8') as file:
      3         lines = file.readlines()
----> 4         lines = lines.replace('.', ' ').replace(',', ' ').lower()
      5     with open(path_out, 'wt')as file_out:
      6         for i in lines:

AttributeError: 'list' object has no attribute 'replace'
!jupyter nbconvert --to script run.ipynb
[NbConvertApp] Converting notebook run.ipynb to script
[NbConvertApp] Writing 1697 bytes to run.py