forked from kubapok/kleister-nda-clone
Add solution
This commit is contained in:
parent
72f787c21b
commit
99d7fabb68
83
dev-0/out.tsv
Normal file
83
dev-0/out.tsv
Normal file
@ -0,0 +1,83 @@
|
||||
effective_date=2014-05-20 jurisdiction=New_York party=LIQUIDMETAL_TECHNOLOGIES_Inc.
|
||||
effective_date=2012-06-21 jurisdiction=New_York
|
||||
jurisdiction=Delaware party=JDA_Software_Group_Inc.
|
||||
effective_date=2009-02-23 jurisdiction=Massachusetts party=Kenneth_M._Bate_and_NitroMed_Inc.
|
||||
jurisdiction=Delaware
|
||||
jurisdiction=Washington
|
||||
|
||||
jurisdiction=New
|
||||
effective_date=2008-03-21 jurisdiction=New_York
|
||||
effective_date=2015-12-11
|
||||
|
||||
|
||||
effective_date=2012-12-17 jurisdiction=Delaware
|
||||
effective_date=2000-05-23 party=uDate.com_Inc.
|
||||
jurisdiction=New_York party=Virgin_Mobile_USA_LLC
|
||||
effective_date=2012-01-25 jurisdiction=Massachusetts
|
||||
effective_date=2008-07-31 jurisdiction=Minnesota party=Cogent_Inc.
|
||||
jurisdiction=California party=Penumbra_Inc.
|
||||
effective_date=2020-11-25 jurisdiction=New_York party=AOL_Inc.
|
||||
effective_date=2017-07-11 jurisdiction=Delaware
|
||||
effective_date=2018-12-28 party=Flexsteel_Industries_Inc.
|
||||
effective_date=2012-03-31 jurisdiction=California
|
||||
jurisdiction=Virginia
|
||||
effective_date=2004-10-11 jurisdiction=North_Carolina
|
||||
jurisdiction=Arizona
|
||||
jurisdiction=Indiana
|
||||
jurisdiction=New_Jersey
|
||||
effective_date=2004-02-27 jurisdiction=California
|
||||
effective_date=2018-03-30 jurisdiction=Delaware party=Jamba_Inc.
|
||||
jurisdiction=Georgia
|
||||
jurisdiction=New_York
|
||||
effective_date=2015-06-23 jurisdiction=New_York
|
||||
jurisdiction=California
|
||||
|
||||
jurisdiction=California
|
||||
party=CafePress_Inc.
|
||||
effective_date=2017-01-13
|
||||
jurisdiction=Ohio
|
||||
|
||||
party=GigPeak_Inc.
|
||||
effective_date=2000-02-10 jurisdiction=Minnesota
|
||||
jurisdiction=California
|
||||
party=Opsware_Inc.
|
||||
|
||||
jurisdiction=Minnesota party=Flexsteel_Industries_Inc.
|
||||
effective_date=2010-06-23 jurisdiction=Texas
|
||||
jurisdiction=New_Jersey party=AlgoRx_Pharmaceuticals_Inc.
|
||||
|
||||
effective_date=2001-01-26 jurisdiction=Washington party=Corus_Pharma_Inc.
|
||||
effective_date=2005-02-16
|
||||
effective_date=2014-11-26
|
||||
jurisdiction=Oregon
|
||||
jurisdiction=Delaware
|
||||
effective_date=2012-06-11 jurisdiction=Delaware party=Lightwave_Logic_Inc.
|
||||
jurisdiction=Delaware party=Webex_Communications_Inc.
|
||||
effective_date=2007-04-30 jurisdiction=Massachusetts
|
||||
effective_date=2011-10-25
|
||||
effective_date=2012-10-15
|
||||
effective_date=2016-03-15 jurisdiction=Delaware
|
||||
effective_date=2005-09-15 jurisdiction=Illinois
|
||||
jurisdiction=Idaho
|
||||
jurisdiction=Washington
|
||||
effective_date=2016-07-15 jurisdiction=New_York party=Wizard_World_Inc.
|
||||
jurisdiction=New_York
|
||||
effective_date=2005-12-31
|
||||
effective_date=2015-03-16 jurisdiction=Utah
|
||||
jurisdiction=Delaware
|
||||
|
||||
effective_date=2004-02-29 jurisdiction=Virginia
|
||||
effective_date=2006-12-19 jurisdiction=New_York
|
||||
effective_date=2010-07-13
|
||||
effective_date=2000-12-11 jurisdiction=Illinois party=Motorola_Inc.
|
||||
effective_date=2011-12-31 party=GigOptix_LLC
|
||||
|
||||
|
||||
effective_date=2011-03-29 jurisdiction=Texas
|
||||
effective_date=2011-05-26 jurisdiction=California party=Skyworks_Solutions_Inc.
|
||||
|
||||
jurisdiction=Delaware
|
||||
jurisdiction=Washington
|
||||
effective_date=2011-01-18 jurisdiction=New_York
|
||||
jurisdiction=Washington
|
||||
jurisdiction=Illinois
|
Can't render this file because it has a wrong number of fields in line 2.
|
118
main.py
118
main.py
@ -1,15 +1,123 @@
|
||||
import csv
|
||||
import sys
|
||||
import lzma
|
||||
import os
|
||||
import regex as re
|
||||
|
||||
out_file_name = "out.tsv"
|
||||
out_sep = '\t'
|
||||
|
||||
months = {
|
||||
"January": "01",
|
||||
"February": "02",
|
||||
"March": "03",
|
||||
"April": "04",
|
||||
"May": "05",
|
||||
"June": "06",
|
||||
"July": "07",
|
||||
"August": "08",
|
||||
"September": "09",
|
||||
"October": "10",
|
||||
"November": "11",
|
||||
"December": "12"
|
||||
}
|
||||
|
||||
|
||||
def main(fname):
|
||||
print(fname)
|
||||
with lzma.open(fname, mode="rt", encoding="utf-8") as tsvfile:
|
||||
reader = csv.reader(tsvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
|
||||
for item in reader:
|
||||
def main(fname: str):
|
||||
dir_path = os.path.dirname(fname)
|
||||
out_path = os.path.join(dir_path, out_file_name)
|
||||
|
||||
with lzma.open(fname, mode="rt", encoding="utf-8") as tsv_file:
|
||||
with open(out_path, "w") as out_file:
|
||||
reader = csv.reader(tsv_file, delimiter='\t')
|
||||
for item in reader:
|
||||
out_file.write(process_entry(item) + '\n')
|
||||
|
||||
|
||||
def process_entry(entry: list) -> str:
|
||||
res = ""
|
||||
parameters = entry[1]
|
||||
for p in parameters.split(" "):
|
||||
t = entry[2]
|
||||
if p == "party":
|
||||
party = get_party(t)
|
||||
if party is not None:
|
||||
res += out_sep + "party=" + party
|
||||
|
||||
elif p == "jurisdiction":
|
||||
jur = get_jurisdiction(t)
|
||||
if jur is not None:
|
||||
res += out_sep + "jurisdiction=" + jur
|
||||
|
||||
elif p == "effective_date":
|
||||
ed = get_date(t)
|
||||
if ed is not None:
|
||||
t_ed = format_date(ed)
|
||||
if t_ed is not None:
|
||||
res += out_sep + "effective_date=" + t_ed
|
||||
return res
|
||||
|
||||
|
||||
def get_party(text: str):
|
||||
res = None
|
||||
m = re.search(
|
||||
r"between([\p{L}\s.,]+)(inc.|Inc.|INC.|Llc|LLC|llc)[\,\.]", text)
|
||||
if m is not None:
|
||||
res = m.group(1).strip().replace(",", "").replace(" ", "_")
|
||||
t = m.group(2).lower()
|
||||
res += ("_" + t.replace("inc.", "Inc.").replace("llc", "LLC"))
|
||||
return res
|
||||
|
||||
|
||||
def get_jurisdiction(text: str):
|
||||
res = None
|
||||
m = re.search(
|
||||
r"laws? of the (?:(?:State of)|(?:Commonwealth of)) ([A-Z][a-z]+(?: [A-Z][a-z]+)?).+(?:(?: and)|[,.])", text)
|
||||
if m is not None:
|
||||
print(m.group(1))
|
||||
res = m.group(1).strip().replace(" ", "_")
|
||||
return res
|
||||
|
||||
|
||||
def get_date(text: str):
|
||||
res = re.search(
|
||||
r"(1[0-2]|0[1-9])[/-](0[1-9]|[1|2][0-9]|3[0-1])[/-](0[1-9]|[1-9][0-9])", text)
|
||||
if res is not None:
|
||||
return res
|
||||
|
||||
res = re.search(
|
||||
r"(0[1-9]|1[1-2])[/-](0[1-9]|1[1-9]|2[1-9]|3[0-1])[/-](19[0-9][0-9]|20[0-9][0-9])", text)
|
||||
if res is not None:
|
||||
return res
|
||||
|
||||
res = re.search(
|
||||
r"([j|J]anuary|[f|F]ebruary|[m|M]arch|[A|a]pril|[M|m]ay|[J|j]une|[J|j]uly|[A|a]ugust|[S|s]eptember|[O|o]ctober|[n|N]ovember|[d|D]ecember)[,\s|\s]+(0[1-9]|[1-2][0-9]|3[0-1])[,\s|\s|,]+(19[0-9][0-9]|20[0-9][0-9])", text)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def format_date(date):
|
||||
res = None
|
||||
if date is not None:
|
||||
month = date[1]
|
||||
try:
|
||||
month = months[month]
|
||||
except(KeyError):
|
||||
pass
|
||||
|
||||
day = date[2]
|
||||
|
||||
year = date[3]
|
||||
|
||||
if len(year) == 2:
|
||||
if int(year[0]) > 2:
|
||||
year = "19" + year
|
||||
else:
|
||||
year = "20" + year
|
||||
|
||||
res = f"{year}-{month}-{day}"
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
|
Loading…
Reference in New Issue
Block a user