This commit is contained in:
Maciej Czajka 2022-04-29 02:03:58 +02:00
parent 1a9a6c9fdc
commit a27bb54bed

1
run.py
View File

@ -112,7 +112,6 @@ def clean_term(t):
return term return term
def find_part(l): def find_part(l):
# regex = r'[A-Z][a-z]+\.*'
# regex = r'\b[A-Z]\w+(?:[ -]+?[A-Z]\w+?){0,2}[,\s]+(?i:inc|holding)\b' # regex = r'\b[A-Z]\w+(?:[ -]+?[A-Z]\w+?){0,2}[,\s]+(?i:inc|holding)\b'
regex = r'\b[A-Z]\w+(?:[ -]+?[A-Z]\w+?){0,2}[,\s]+(?i:inc|holding|corporation|corp|llc)\b' regex = r'\b[A-Z]\w+(?:[ -]+?[A-Z]\w+?){0,2}[,\s]+(?i:inc|holding|corporation|corp|llc)\b'
s = re.findall(regex, l) s = re.findall(regex, l)