diff --git a/dev-0/out.tsv b/dev-0/out.tsv new file mode 100644 index 0000000..694fbea --- /dev/null +++ b/dev-0/out.tsv @@ -0,0 +1,83 @@ +effective_date=2014-05-20 jurisdiction=New_York party=Liquidmetal_Technology_Inc. term=3_years +effective_date=2008-04-08 jurisdiction=Georgia party=Nuclear_Operating_Company_Inc. +effective_date=2012-09-04 jurisdiction=Delaware party=Jda_Software_Group_Inc. +effective_date=2006-03-23 jurisdiction=Massachusetts party=Among_Nitromed_Inc. +effective_date=2015-03-01 jurisdiction=Delaware +effective_date=2012-01-11 jurisdiction=Washington party=First_Financial_Northwest_Inc. +effective_date=2009-04-30 jurisdiction=Delaware term=3_years +effective_date=2016-06-22 jurisdiction=New_Jersey party=Interpace_Diagnostics_Group_Inc. term=2_years +effective_date=2008-03-21 jurisdiction=New_York party=Intercept_Pharmaceuticals_Inc. +effective_date=2015-12-11 party=Anadigics_Inc. +jurisdiction=Delaware +effective_date=2018-11-15 jurisdiction=Delaware party=Among_Tesaro_Inc. term=5_years +effective_date=2012-12-17 jurisdiction=Delaware party=2012_Allergan_Inc. term=2_years +effective_date=2000-05-23 jurisdiction=Delaware +jurisdiction=New_Jersey term=2_years +effective_date=2012-01-25 jurisdiction=Massachusetts term=2_years +effective_date=2008-07-31 jurisdiction=Delaware party=Cogent_Inc. +jurisdiction=California party=Penumbra_Inc. term=2_years +effective_date=2020-11-25 jurisdiction=New_York term=3_years +effective_date=2017-07-11 jurisdiction=California +effective_date=2018-12-28 jurisdiction=Iowa party=Flexsteel_Industries_Inc. term=12_months +effective_date=2012-03-31 jurisdiction=Delaware party=Integrated_Device_Technology_Inc. term=12_months +effective_date=2004-10-01 jurisdiction=Virginia party=Maximus_Inc. term=3_years +effective_date=2004-10-11 jurisdiction=North_Carolina party=Inspire_Pharmaceuticals_Inc. +jurisdiction=Arizona party=Jda_Software_Inc. +jurisdiction=Indiana +jurisdiction=New_Jersey term=12_months +effective_date=2005-05-31 jurisdiction=California +effective_date=2018-03-30 jurisdiction=Delaware party=Jamba_Inc. +effective_date=2013-05-01 jurisdiction=Georgia party=Citi_Trends_Inc. term=2_years +jurisdiction=New_York party=Synova_Healthcare_Inc. term=5_years +effective_date=2015-06-23 jurisdiction=New_York party=By_Wynnefield_Capital_Inc. +effective_date=2008-01-01 jurisdiction=California party=Biolargo_Inc. +jurisdiction=Minnesota party=Paul_Travelers_Companies_Inc. +jurisdiction=Delaware party=Investment_Trust_Iii_Inc. term=2_years +effective_date=2018-08-16 jurisdiction=Kentucky party=District_Photo_Inc. term=3_years +effective_date=2017-01-13 jurisdiction=Minnesota party=Target_Enterprise_Inc. term=3_years +jurisdiction=Ohio term=18_months +jurisdiction=Massachusetts +effective_date=2017-01-11 jurisdiction=California party=Gigpeak_Inc. +effective_date=2000-02-10 jurisdiction=Minnesota party=England_Business_Service_Inc. +effective_date=2008-11-06 jurisdiction=California +effective_date=2007-05-09 jurisdiction=Delaware term=12_months +jurisdiction=Illinois term=12_months +effective_date=2003-05-14 jurisdiction=Minnesota party=Flexsteel_Industries_Inc. term=2_years +effective_date=2010-06-23 jurisdiction=Texas party=American_Well_Control_Inc. term=2_years +jurisdiction=New_Jersey party=Algorx_Pharmaceuticals_Inc. +effective_date=2009-11-06 jurisdiction=Delaware term=2_years +effective_date=2001-01-26 jurisdiction=Washington party=Corus_Pharma_Inc. term=12_months +effective_date=2005-02-16 jurisdiction=Georgia term=2_years +effective_date=2014-11-26 jurisdiction=Delaware party=E2Open_Inc. +effective_date=1999-03-15 jurisdiction=Oregon party=1999_By_Acumed_Inc. +effective_date=2008-07-02 jurisdiction=California +effective_date=2012-06-11 jurisdiction=Delaware party=Lightwave_Logic_Inc. +effective_date=2007-03-04 jurisdiction=Delaware party=Webex_Communications_Inc. term=12_months +effective_date=2007-04-30 jurisdiction=Massachusetts term=5_years +effective_date=2011-10-25 jurisdiction=California party=Applied_Biosystems_Inc. term=2_months +effective_date=2012-10-15 jurisdiction=Delaware +effective_date=2016-03-15 jurisdiction=Delaware +effective_date=2005-09-15 jurisdiction=Illinois +effective_date=2005-05-04 jurisdiction=Idaho party=Kimberly_Gold_Mines_Inc. +effective_date=2020-08-14 jurisdiction=Ohio +effective_date=2016-07-15 jurisdiction=California party=Wizard_World_Inc. +effective_date=2008-08-01 jurisdiction=New_York party=Omrix_Biopharmaceuticals_Inc. term=12_months +effective_date=2006-01-01 jurisdiction=California party=Beverly_Hills_Bancorp_Inc. term=12_months +effective_date=2015-03-16 jurisdiction=Utah party=Galil_Medical_Inc. +effective_date=2012-07-05 jurisdiction=Delaware party=Confidential_Williams_Controls_Inc. term=3_years +effective_date=2015-12-08 jurisdiction=Washington +effective_date=2004-02-29 jurisdiction=Virginia +effective_date=2006-12-19 jurisdiction=Connecticut term=18_months +effective_date=2010-07-13 jurisdiction=New_York +effective_date=2000-12-11 jurisdiction=Illinois party=Motorola_Inc. term=10_years +effective_date=2008-03-26 jurisdiction=California term=5_years +effective_date=2017-10-02 jurisdiction=Delaware term=2_years +party=Ideal_Restaurant_Group_Inc. +effective_date=2011-03-29 jurisdiction=Texas party=Newgistics_Inc. +effective_date=2011-05-26 jurisdiction=California party=Skyworks_Solutions_Inc. +effective_date=2006-09-01 jurisdiction=Washington party=Digital_River_Inc. term=5_years +effective_date=2015-04-02 jurisdiction=Delaware party=Involving_Magnetek_Inc. term=2_years +effective_date=2017-07-21 jurisdiction=Washington +effective_date=2011-01-18 jurisdiction=New_York party=Information_Regarding_Orthovita_Inc. +jurisdiction=Washington party=Alder_Biopharmaceuticals_Inc. term=5_years +effective_date=2020-07-21 jurisdiction=Illinois party=Agreement_Us_Foods_Inc. term=2_years diff --git a/kleister-nda.ipynb b/kleister-nda.ipynb new file mode 100644 index 0000000..3f03a4c --- /dev/null +++ b/kleister-nda.ipynb @@ -0,0 +1,466 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extract key information from Edgar NDA documents" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pathlib\n", + "from collections import Counter\n", + "from sklearn.metrics import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "KLEISTER_PATH = pathlib.Path('C:/Users/Fijka/Documents/kleister-nda-clone')\n", + "file_name = 'train'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read expected train data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def get_expected_data(filepath, data_key):\n", + " dataset_expected_key = []\n", + " with open(filepath,'r') as train_expected_file:\n", + " for line in train_expected_file:\n", + " key_values = line.rstrip('\\n').split(' ')\n", + " data_value = None\n", + " for key_value in key_values:\n", + " key, value = key_value.split('=')\n", + " if key == data_key:\n", + " data_value = value\n", + " if data_value is None:\n", + " data_value = 'NONE'\n", + " dataset_expected_key.append(data_value)\n", + " return dataset_expected_key" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "KEYS = ['effective_date', 'jurisdiction', 'party', 'term']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def read_expected_data(filepath):\n", + " data = []\n", + " for key in KEYS:\n", + " data.append(get_expected_data(filepath, key))\n", + " return data\n", + "\n", + "if file_name != 'test-A':\n", + " train_expected_data = read_expected_data(KLEISTER_PATH/file_name/'expected.tsv')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "if file_name != 'test-A':\n", + " [i[:1] for i in train_expected_data]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read train dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import lzma\n", + "import csv\n", + "\n", + "def read_data(filename):\n", + " all_data = lzma.open(filename).read().decode('UTF-8').split('\\n')\n", + " return [line.split('\\t') for line in all_data][:-1]\n", + "\n", + "train_data = read_data(KLEISTER_PATH/file_name/'in.tsv.xz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## JURISDICTION" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "STATES = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware','Florida',\n", + " 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',\n", + " 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',\n", + " 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',\n", + " 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah',\n", + " 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "false jurisdiction: 22\n" + ] + } + ], + "source": [ + "import spacy\n", + "nlp = spacy.load(\"en_core_web_sm\")\n", + "from operator import itemgetter\n", + "\n", + "jurisdiction = []\n", + "\n", + "def normalize(text):\n", + " return text.replace('\\\\n', ' ').lower()\n", + " # nlp(text) -> tokenizacja\n", + "\n", + "def check_jurisdiction(document):\n", + " states = {}\n", + " for text in document[2:]:\n", + " text = normalize(text)\n", + " for state in STATES:\n", + " if state.lower() in text:\n", + " if state in states:\n", + " states[state][0] += text.count(state.lower())\n", + " else:\n", + " states[state] = [text.count(state.lower()), text.index(state.lower())]\n", + " if states != {}:\n", + " states = sorted(states.items(), key=itemgetter(1), reverse=True)\n", + " jurisdiction.append(states[0][0].replace(' ', '_'))\n", + " return states[0][0], states\n", + " else:\n", + " jurisdiction.append(None)\n", + " return None\n", + " \n", + "tmp = 0\n", + "for i in range(len(train_data)):\n", + " tt = check_jurisdiction(train_data[i])\n", + " if file_name != 'test-A':\n", + " if tt == None:\n", + " if train_expected_data[1][i] != None:\n", + " # print(i, train_expected_data[1][i], tt)\n", + " tmp += 1\n", + " else:\n", + " if tt[0] != train_expected_data[1][i].replace('_', ' '):\n", + " # print(i, train_expected_data[1][i], tt[0])\n", + " tmp += 1\n", + "print('false jurisdiction:', tmp)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## EFFECTIVE DATE" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "false effective date 42\n" + ] + } + ], + "source": [ + "import re\n", + "import datetime\n", + "from datetime import date\n", + "\n", + "effective_date = []\n", + "\n", + "def parse_date(date):\n", + " month = str(date.month)\n", + " if len(month) == 1:\n", + " month = '0' + str(date.month)\n", + " day = str(date.day)\n", + " if len(day) == 1:\n", + " day = '0' + str(date.day)\n", + " return str(date.year) + '-' + month + '-' + day\n", + "\n", + "def find_dates(text):\n", + " \n", + " MONTHS = {'January' : 1, 'February' : 2, 'March' : 3, 'April' : 4, 'May' : 5, 'June' : 6,\n", + " 'July' : 7, 'August' : 8, 'September' : 9, 'October' : 10, 'November' : 11, 'December' : 12}\n", + " \n", + " all_dates = []\n", + " \n", + " text = text.replace('\\\\n', ' ')\n", + " \n", + " dic = {'\\d{1,2}\\/\\d{1,2}\\/\\d{2}' : '%m/%d/%y',\n", + " '[01]*[0-9]\\/[01]*[0-9]\\/\\d{4}' : '%m/%d/%Y',\n", + " '\\w{3,9}?\\s\\d{1,2}?,\\s\\d{4}?' : '%B %d, %Y',\n", + " '\\w{3,9}?\\s\\d{1,2}?,\\d{4}?' : '%B %d,%Y',\n", + " '\\d{1,2}?th\\sday\\sof\\s\\w{3,9}?\\s\\d{4}?' : '%dth day of %B %Y',\n", + " '\\d{1,2}?th\\sday\\sof\\s\\w{3,9}?,\\s\\d{4}?' : '%dth day of %B, %Y',\n", + " '\\d{1,2}?ND\\sday\\sof\\s\\w{3,9}?\\s\\d{4}?' : '%dND day of %B %Y',\n", + " '\\w{3,9}?\\s\\d{1,2}?th\\s,\\s\\d{4}?' : '%B %dth , %Y',\n", + " '\\w{3,9}?\\s\\d{1,2}?th,\\s\\d{4}?' : '%B %dth, %Y',\n", + " '\\d{1,2}?\\sday\\sof\\s\\w{3,9}?,\\s\\d{4}?' : '%d day of %B, %Y',\n", + " '\\w{3,9}?\\.\\s\\d{1,2}?,\\s\\d{4}?' : '%b. %d, %Y',\n", + " '\\d{1,2}?\\s\\w{3,9}?,\\s\\d{4}?' : '%d %B, %Y',\n", + " '\\d{1,2}?st\\sday\\sof\\s\\w{3,9}?\\s,\\s\\d{4}?' : '%dst day of %B , %Y',\n", + " '\\d{1,2}?st\\sday\\sof\\s\\w{3,9}?,\\s\\d{4}?' : '%dst day of %B, %Y',\n", + " '\\d{1,2}?nd\\sday\\sof\\s\\w{3,9}?,\\s\\d{4}?' : '%dnd day of %B, %Y',\n", + " '\\d{1,2}\\.\\d{1,2}\\.\\d{2,4}' : '%m.%d.%y'\n", + " }\n", + " \n", + " for d in dic:\n", + " match = re.search(r'' + d, text)\n", + " if match != None:\n", + " try:\n", + " date = datetime.datetime.strptime(match.group(), dic[d]).date()\n", + " all_dates.append(parse_date(date))\n", + " except:\n", + " pass\n", + " \n", + " return all_dates\n", + "\n", + "def check_effective_date(text):\n", + " dates = []\n", + " x = find_dates(text)\n", + " if x != []:\n", + " dates.append(x)\n", + " return(dates)\n", + "\n", + "test = 0\n", + "for i in range(len(train_data)):\n", + " xx = check_effective_date(train_data[i][2])\n", + " if file_name != 'test-A':\n", + " if train_expected_data[0][i] == 'NONE':\n", + " if xx != []:\n", + " # print(i, train_expected_data[0][i], xx[-1][0])\n", + " test += 1\n", + " else:\n", + " if xx != []:\n", + " if xx[0][-1] != train_expected_data[0][i]:\n", + " # print(i, train_expected_data[0][i], xx[-1][0])\n", + " test +=1\n", + " else:\n", + " # print(i, train_expected_data[0][i], xx)\n", + " test += 1\n", + " if xx != []:\n", + " effective_date.append(xx[-1][0])\n", + " else:\n", + " effective_date.append(None)\n", + "print('false effective date', test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PARTY" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "false party: 202\n" + ] + } + ], + "source": [ + "party = []\n", + "\n", + "def check_party(document):\n", + " dic = {'And_' : 4,\n", + " 'From_' : 5,\n", + " 'For' : 4,\n", + " 'Between' : 8,\n", + " 'With' : 5,\n", + " 'Ceo' : 4,\n", + " 'To' : 3,\n", + " }\n", + " \n", + " for text in document[2:]:\n", + " text = text.replace('\\\\n', ' ')\n", + " \n", + " result = None\n", + " match = re.search(r'\\w*\\s\\w*\\s\\w*,\\sInc\\.', text)\n", + " if match == None:\n", + " match = re.search(r'\\w*\\s\\w*\\s\\w*,\\sINC\\.', text)\n", + " if match != None:\n", + " result = match.group().title()\n", + " result = result.replace(',', '').replace(' ', '_')\n", + " for d in dic:\n", + " if d in result:\n", + " result = result[result.index(d) + dic[d]:]\n", + " if result.startswith('_'):\n", + " result = result[1:]\n", + " return result\n", + " \n", + "tmp = 0\n", + "for i in range(len(train_data)):\n", + " tt = check_party(train_data[i])\n", + " party.append(tt)\n", + " if file_name != 'test-A':\n", + " if train_expected_data[2][i] != tt:\n", + " tmp += 1\n", + " # print(i, train_expected_data[2][i], tt)\n", + "print('false party:', tmp)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TERM" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "false term: 144\n" + ] + } + ], + "source": [ + "term = []\n", + "\n", + "def check_term(document):\n", + " \n", + " result = None\n", + " for text in document[2:]:\n", + " text = text.replace('\\\\n', ' ')\n", + " \n", + " \n", + " match = re.search(r'\\(\\d*\\)\\syears', text)\n", + " if match == None:\n", + " match = re.search(r'\\(\\d*\\)\\smonths', text)\n", + " if match != None:\n", + " result = match.group().replace('(', '').replace(')', '').replace(' ', '_')\n", + " return result\n", + " return result\n", + " \n", + "tmp = 0\n", + "for i in range(len(train_data)):\n", + " tt = check_term(train_data[i])\n", + " term.append(tt)\n", + " if file_name != 'test-A':\n", + " if train_expected_data[3][i] != tt:\n", + " if train_expected_data[3][i] == 'NONE' and tt == None:\n", + " pass\n", + " else:\n", + " # print(i, train_expected_data[3][i], tt)\n", + " tmp += 1\n", + "print('false term:', tmp)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "def write_output(effective_date, jurisdiction, party, term):\n", + " if os.path.exists(KLEISTER_PATH/file_name/'out.tsv'):\n", + " os.remove(KLEISTER_PATH/file_name/'out.tsv')\n", + " file = open(KLEISTER_PATH/file_name/'out.tsv', 'w')\n", + " for doc in range(len(effective_date)):\n", + " result = ''\n", + " if effective_date[doc] != None:\n", + " result += 'effective_date=' + effective_date[doc] + '\\t'\n", + " if jurisdiction[doc] != None:\n", + " result += 'jurisdiction=' + jurisdiction[doc] + '\\t'\n", + " if party[doc] != None:\n", + " result += 'party=' + party[doc] + '\\t'\n", + " if term[doc] != None:\n", + " result += 'term=' + term[doc] + '\\t'\n", + " if len(result) > 1:\n", + " result = result[:-1]\n", + " result += '\\n'\n", + " file.write(result)\n", + " file.close()\n", + " \n", + "write_output(effective_date, jurisdiction, party, term)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/kleister-nda.py b/kleister-nda.py new file mode 100644 index 0000000..7577ae7 --- /dev/null +++ b/kleister-nda.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # Extract key information from Edgar NDA documents + +# In[1]: + + +import pathlib +from collections import Counter +from sklearn.metrics import * + + +# In[2]: + + +KLEISTER_PATH = pathlib.Path('C:/Users/Fijka/Documents/kleister-nda-clone') +file_name = 'train' + + +# ## Read expected train data + +# In[3]: + + +def get_expected_data(filepath, data_key): + dataset_expected_key = [] + with open(filepath,'r') as train_expected_file: + for line in train_expected_file: + key_values = line.rstrip('\n').split(' ') + data_value = None + for key_value in key_values: + key, value = key_value.split('=') + if key == data_key: + data_value = value + if data_value is None: + data_value = 'NONE' + dataset_expected_key.append(data_value) + return dataset_expected_key + + +# In[4]: + + +KEYS = ['effective_date', 'jurisdiction', 'party', 'term'] + + +# In[5]: + + +def read_expected_data(filepath): + data = [] + for key in KEYS: + data.append(get_expected_data(filepath, key)) + return data + +if file_name != 'test-A': + train_expected_data = read_expected_data(KLEISTER_PATH/file_name/'expected.tsv') + + +# In[6]: + + +if file_name != 'test-A': + [i[:1] for i in train_expected_data] + + +# ## Read train dataset + +# In[7]: + + +import lzma +import csv + +def read_data(filename): + all_data = lzma.open(filename).read().decode('UTF-8').split('\n') + return [line.split('\t') for line in all_data][:-1] + +train_data = read_data(KLEISTER_PATH/file_name/'in.tsv.xz') + + +# ## JURISDICTION + +# In[8]: + + +STATES = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware','Florida', + 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', + 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', + 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', + 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', + 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'] + + +# In[9]: + + +import spacy +nlp = spacy.load("en_core_web_sm") +from operator import itemgetter + +jurisdiction = [] + +def normalize(text): + return text.replace('\\n', ' ').lower() + # nlp(text) -> tokenizacja + +def check_jurisdiction(document): + states = {} + for text in document[2:]: + text = normalize(text) + for state in STATES: + if state.lower() in text: + if state in states: + states[state][0] += text.count(state.lower()) + else: + states[state] = [text.count(state.lower()), text.index(state.lower())] + if states != {}: + states = sorted(states.items(), key=itemgetter(1), reverse=True) + jurisdiction.append(states[0][0].replace(' ', '_')) + return states[0][0], states + else: + jurisdiction.append(None) + return None + +tmp = 0 +for i in range(len(train_data)): + tt = check_jurisdiction(train_data[i]) + if file_name != 'test-A': + if tt == None: + if train_expected_data[1][i] != None: + # print(i, train_expected_data[1][i], tt) + tmp += 1 + else: + if tt[0] != train_expected_data[1][i].replace('_', ' '): + # print(i, train_expected_data[1][i], tt[0]) + tmp += 1 +print('false jurisdiction:', tmp) + + +# ## EFFECTIVE DATE + +# In[10]: + + +import re +import datetime +from datetime import date + +effective_date = [] + +def parse_date(date): + month = str(date.month) + if len(month) == 1: + month = '0' + str(date.month) + day = str(date.day) + if len(day) == 1: + day = '0' + str(date.day) + return str(date.year) + '-' + month + '-' + day + +def find_dates(text): + + MONTHS = {'January' : 1, 'February' : 2, 'March' : 3, 'April' : 4, 'May' : 5, 'June' : 6, + 'July' : 7, 'August' : 8, 'September' : 9, 'October' : 10, 'November' : 11, 'December' : 12} + + all_dates = [] + + text = text.replace('\\n', ' ') + + dic = {'\d{1,2}\/\d{1,2}\/\d{2}' : '%m/%d/%y', + '[01]*[0-9]\/[01]*[0-9]\/\d{4}' : '%m/%d/%Y', + '\w{3,9}?\s\d{1,2}?,\s\d{4}?' : '%B %d, %Y', + '\w{3,9}?\s\d{1,2}?,\d{4}?' : '%B %d,%Y', + '\d{1,2}?th\sday\sof\s\w{3,9}?\s\d{4}?' : '%dth day of %B %Y', + '\d{1,2}?th\sday\sof\s\w{3,9}?,\s\d{4}?' : '%dth day of %B, %Y', + '\d{1,2}?ND\sday\sof\s\w{3,9}?\s\d{4}?' : '%dND day of %B %Y', + '\w{3,9}?\s\d{1,2}?th\s,\s\d{4}?' : '%B %dth , %Y', + '\w{3,9}?\s\d{1,2}?th,\s\d{4}?' : '%B %dth, %Y', + '\d{1,2}?\sday\sof\s\w{3,9}?,\s\d{4}?' : '%d day of %B, %Y', + '\w{3,9}?\.\s\d{1,2}?,\s\d{4}?' : '%b. %d, %Y', + '\d{1,2}?\s\w{3,9}?,\s\d{4}?' : '%d %B, %Y', + '\d{1,2}?st\sday\sof\s\w{3,9}?\s,\s\d{4}?' : '%dst day of %B , %Y', + '\d{1,2}?st\sday\sof\s\w{3,9}?,\s\d{4}?' : '%dst day of %B, %Y', + '\d{1,2}?nd\sday\sof\s\w{3,9}?,\s\d{4}?' : '%dnd day of %B, %Y', + '\d{1,2}\.\d{1,2}\.\d{2,4}' : '%m.%d.%y' + } + + for d in dic: + match = re.search(r'' + d, text) + if match != None: + try: + date = datetime.datetime.strptime(match.group(), dic[d]).date() + all_dates.append(parse_date(date)) + except: + pass + + return all_dates + +def check_effective_date(text): + dates = [] + x = find_dates(text) + if x != []: + dates.append(x) + return(dates) + +test = 0 +for i in range(len(train_data)): + xx = check_effective_date(train_data[i][2]) + if file_name != 'test-A': + if train_expected_data[0][i] == 'NONE': + if xx != []: + # print(i, train_expected_data[0][i], xx[-1][0]) + test += 1 + else: + if xx != []: + if xx[0][-1] != train_expected_data[0][i]: + # print(i, train_expected_data[0][i], xx[-1][0]) + test +=1 + else: + # print(i, train_expected_data[0][i], xx) + test += 1 + if xx != []: + effective_date.append(xx[-1][0]) + else: + effective_date.append(None) +print('false effective date', test) + + +# ## PARTY + +# In[11]: + + +party = [] + +def check_party(document): + dic = {'And_' : 4, + 'From_' : 5, + 'For' : 4, + 'Between' : 8, + 'With' : 5, + 'Ceo' : 4, + 'To' : 3, + } + + for text in document[2:]: + text = text.replace('\\n', ' ') + + result = None + match = re.search(r'\w*\s\w*\s\w*,\sInc\.', text) + if match == None: + match = re.search(r'\w*\s\w*\s\w*,\sINC\.', text) + if match != None: + result = match.group().title() + result = result.replace(',', '').replace(' ', '_') + for d in dic: + if d in result: + result = result[result.index(d) + dic[d]:] + if result.startswith('_'): + result = result[1:] + return result + +tmp = 0 +for i in range(len(train_data)): + tt = check_party(train_data[i]) + party.append(tt) + if file_name != 'test-A': + if train_expected_data[2][i] != tt: + tmp += 1 + # print(i, train_expected_data[2][i], tt) +print('false party:', tmp) + + +# ## TERM + +# In[12]: + + +term = [] + +def check_term(document): + + result = None + for text in document[2:]: + text = text.replace('\\n', ' ') + + + match = re.search(r'\(\d*\)\syears', text) + if match == None: + match = re.search(r'\(\d*\)\smonths', text) + if match != None: + result = match.group().replace('(', '').replace(')', '').replace(' ', '_') + return result + return result + +tmp = 0 +for i in range(len(train_data)): + tt = check_term(train_data[i]) + term.append(tt) + if file_name != 'test-A': + if train_expected_data[3][i] != tt: + if train_expected_data[3][i] == 'NONE' and tt == None: + pass + else: + # print(i, train_expected_data[3][i], tt) + tmp += 1 +print('false term:', tmp) + + +# In[13]: + + +import os + +def write_output(effective_date, jurisdiction, party, term): + if os.path.exists(KLEISTER_PATH/file_name/'out.tsv'): + os.remove(KLEISTER_PATH/file_name/'out.tsv') + file = open(KLEISTER_PATH/file_name/'out.tsv', 'w') + for doc in range(len(effective_date)): + result = '' + if effective_date[doc] != None: + result += 'effective_date=' + effective_date[doc] + '\t' + if jurisdiction[doc] != None: + result += 'jurisdiction=' + jurisdiction[doc] + '\t' + if party[doc] != None: + result += 'party=' + party[doc] + '\t' + if term[doc] != None: + result += 'term=' + term[doc] + '\t' + if len(result) > 1: + result = result[:-1] + result += '\n' + file.write(result) + file.close() + +write_output(effective_date, jurisdiction, party, term) + diff --git a/test-A/out.tsv b/test-A/out.tsv new file mode 100644 index 0000000..73e0a84 --- /dev/null +++ b/test-A/out.tsv @@ -0,0 +1,203 @@ +effective_date=2017-02-24 jurisdiction=Ohio term=12_months +effective_date=2013-05-08 jurisdiction=California party=Veramark_Technologies_Inc. term=2_years +jurisdiction=Indiana party=Zimmer_Inc. term=2_years +effective_date=2007-03-28 jurisdiction=New_York party=Motive_Inc. term=2_years +effective_date=2015-05-21 jurisdiction=Nevada party=Lumicon_Exhibition_Services_Inc. +effective_date=2010-07-16 jurisdiction=Colorado term=20_years +jurisdiction=Colorado party=Array_Biopharma_Inc. +jurisdiction=California party=Sunstone_Hotel_Investors_Inc. +effective_date=2012-01-01 jurisdiction=Delaware +effective_date=2008-06-25 jurisdiction=New_York +effective_date=2016-04-19 jurisdiction=California party=2016_Cavium_Inc. +effective_date=2011-02-28 jurisdiction=Delaware party=Of_Lacrosse_Footwear_Inc. +effective_date=2011-10-07 jurisdiction=New_York +effective_date=2017-07-20 jurisdiction=Georgia party=Global_Project_Services_Inc. +effective_date=2015-08-10 jurisdiction=Texas party=Daegis_Inc. term=2_years +effective_date=2008-03-17 jurisdiction=California +effective_date=2012-12-19 jurisdiction=Washington party=First_Financial_Northwest_Inc. +term=10_years +effective_date=2009-03-30 jurisdiction=Florida term=18_months +effective_date=2008-07-16 jurisdiction=Delaware party=American_Land_Lease_Inc. +jurisdiction=Florida party=Flow_Capital_Advisors_Inc. +effective_date=2009-12-31 jurisdiction=New_York party=Intercept_Pharmaceuticals_Inc. +effective_date=2019-04-08 jurisdiction=California party=Aerohive_Networks_Inc. term=2_years +effective_date=2007-10-11 jurisdiction=Florida party=By_Suncoast_Holdings_Inc. term=12_months +effective_date=2009-11-19 jurisdiction=New_York party=Comsys_It_Partners_Inc. term=18_months +effective_date=2005-07-15 jurisdiction=Washington term=5_years +effective_date=2020-07-23 jurisdiction=Texas +effective_date=2012-10-18 jurisdiction=Delaware term=12_months +effective_date=2004-01-01 jurisdiction=Delaware party=er_International_Inc. +effective_date=2020-05-19 jurisdiction=Washington +effective_date=2012-01-11 jurisdiction=Delaware party=Opnet_Technologies_Inc. term=3_years +effective_date=2007-10-25 jurisdiction=Pennsylvania party=Kks_Venture_Management_Inc. term=12_months +effective_date=2010-11-02 jurisdiction=California +jurisdiction=Utah +effective_date=2018-11-08 jurisdiction=New_York +effective_date=2008-07-08 jurisdiction=New_York +effective_date=2003-08-05 jurisdiction=New_York party=Information_Spectrum_Inc. term=5_years +effective_date=2014-01-28 jurisdiction=Delaware party=Vapotherm_Inc. term=5_years +effective_date=2006-06-29 jurisdiction=Florida party=Sun_Energy_Solar_Inc. term=5_years +effective_date=2007-01-25 jurisdiction=California +jurisdiction=Virginia party=Circuit_City_Stores_Inc. term=10_years +effective_date=2010-07-23 jurisdiction=Pennsylvania term=5_years +effective_date=2010-01-25 jurisdiction=Delaware party=Protection_One_Inc. +effective_date=2008-04-03 jurisdiction=Washington party=Performing_Arts_Center_Inc. term=3_years +effective_date=2012-02-24 jurisdiction=Virginia party=Unicom_Systems_Inc. term=2_years +jurisdiction=California party=Supermicro_Computer_Inc. term=7_years +effective_date=2012-07-30 jurisdiction=Delaware +effective_date=2000-03-28 jurisdiction=Pennsylvania party=Piercing_Pagoda_Inc. +effective_date=2012-05-11 jurisdiction=New_Jersey party=Sun_Pharmaceutical_Industries_Inc. term=2_years +effective_date=2014-11-07 jurisdiction=Illinois party=The_Adept_Technology_Inc. term=5_years +effective_date=2018-03-01 jurisdiction=Texas party=Mountain_High_Brands_Inc. +jurisdiction=Massachusetts party=Restorbio_Inc. +effective_date=2010-07-27 jurisdiction=Washington term=3_years +effective_date=2011-02-23 jurisdiction=Delaware party=Regarding_Inspire_Pharmaceuticals_Inc. +effective_date=2017-10-09 jurisdiction=Delaware term=12_months +effective_date=2004-06-08 jurisdiction=New_York party=Jones_Apparel_Group_Inc. +effective_date=2020-04-20 jurisdiction=Pennsylvania term=12_months +effective_date=2020-01-01 jurisdiction=California party=Autodesk_Inc. term=2_years +effective_date=2001-09-17 jurisdiction=Georgia party=Earthlink_Inc. +effective_date=2011-02-25 jurisdiction=Delaware party=Arch_Coal_Inc. +effective_date=2018-04-10 jurisdiction=Delaware party=Among_Hopfed_Bancorp_Inc. +effective_date=2017-10-31 jurisdiction=Massachusetts party=Sonus_Networks_Inc. +jurisdiction=Connecticut +effective_date=2006-05-11 jurisdiction=California party=2006_Beckman_Coulter_Inc. +effective_date=2006-08-08 jurisdiction=Texas party=Genesis_Energy_Inc. +effective_date=2009-05-06 jurisdiction=Delaware party=Etrials_Worldwide_Inc. +effective_date=2006-03-01 jurisdiction=Virginia term=12_months +effective_date=2013-11-18 jurisdiction=Delaware term=2_years +effective_date=2010-06-08 jurisdiction=Washington term=2_years +effective_date=2011-10-18 jurisdiction=Minnesota party=Electromed_Inc. term=12_months +effective_date=2020-08-27 jurisdiction=Oregon party=Quest_Solution_Inc. term=2_years +effective_date=2002-06-21 jurisdiction=Texas party=Mannatech_Inc. term=5_years +effective_date=2016-01-01 jurisdiction=Colorado party=Evolving_Systems_Inc. term=3_years +effective_date=2007-10-31 jurisdiction=Colorado term=12_months +effective_date=2017-05-24 jurisdiction=Nevada term=3_years +effective_date=2017-12-18 jurisdiction=New_York +effective_date=2005-07-05 jurisdiction=Delaware +jurisdiction=Illinois party=Walgreens_Boots_Alliance_Inc. term=2_years +effective_date=2014-12-22 jurisdiction=New_York party=Celator_Pharmaceuticals_Inc. +effective_date=2012-01-27 jurisdiction=New_York party=The_Talbots_Inc. +effective_date=2006-07-10 jurisdiction=Florida party=Sun_Energy_Solar_Inc. term=5_years +effective_date=2004-01-07 jurisdiction=Virginia party=Waechter_Caci_International_Inc. +effective_date=2017-01-20 jurisdiction=Delaware party=Ocera_Therapeutics_Inc. term=5_years +effective_date=2005-09-19 jurisdiction=Nevada term=2_years +effective_date=2016-11-17 jurisdiction=Delaware party=Nimble_Storage_Inc. term=18_months +jurisdiction=New_Jersey party=Amber_Road_Inc. +effective_date=2001-12-12 jurisdiction=North_Carolina term=3_years +effective_date=2019-08-05 jurisdiction=Minnesota party=Apogee_Enterprises_Inc. term=18_months +jurisdiction=Connecticut +jurisdiction=North_Carolina term=12_months +effective_date=2009-03-09 jurisdiction=Kansas party=Bats_Exchange_Inc. term=2_years +effective_date=2016-07-25 jurisdiction=Florida party=Carolco_Pictures_Inc. term=3_years +effective_date=2015-08-26 jurisdiction=Washington party=Among_Riverview_Bancorp_Inc. term=2_years +effective_date=2001-03-21 jurisdiction=Oregon +effective_date=2004-03-17 jurisdiction=New_York party=The_Nymex_Holdings_Inc. +jurisdiction=Georgia party=Creative_Insurance_Managers_Inc. +effective_date=2003-09-04 jurisdiction=Delaware +effective_date=2015-12-08 jurisdiction=New_York +jurisdiction=Colorado party=Integrated_Wellness_Systems_Inc. term=5_years +effective_date=2016-07-26 jurisdiction=California term=3_years +effective_date=2020-09-28 jurisdiction=Tennessee party=Comverge_Inc. +effective_date=2012-09-01 jurisdiction=Ohio party=Myers_Industries_Inc. term=3_years +effective_date=2019-02-08 jurisdiction=Minnesota party=1_Apogee_Enterprises_Inc. +effective_date=2003-12-02 jurisdiction=Delaware party=Compass_Group_Usa_Inc. term=2_years +effective_date=2006-02-28 jurisdiction=California party=American_Way_Importing_Inc. +effective_date=2003-07-02 jurisdiction=Connecticut party=13_Lydall_Inc. term=2_years +effective_date=2019-02-14 jurisdiction=Maryland term=2_years +effective_date=2016-11-17 jurisdiction=Delaware party=Nimble_Storage_Inc. term=18_months +effective_date=2019-01-16 jurisdiction=Utah party=Among_Glacier_Bancorp_Inc. term=2_years +effective_date=2010-10-19 jurisdiction=California party=Applied_Signal_Technology_Inc. term=18_months +effective_date=2012-02-12 jurisdiction=Utah term=5_years +effective_date=2017-10-01 jurisdiction=Colorado party=Evolving_Systems_Inc. term=3_years +effective_date=2009-12-08 jurisdiction=New_York term=2_years +effective_date=2017-12-15 jurisdiction=New_York party=Delta_Air_Lines_Inc. +effective_date=2002-02-28 jurisdiction=California term=3_years +effective_date=2013-07-29 jurisdiction=New_York party=Among_Cytori_Therapeutics_Inc. term=7_years +effective_date=2018-04-01 jurisdiction=Virginia party=Maximus_Inc. term=12_months +effective_date=2002-05-08 jurisdiction=Massachusetts party=Opta_Food_Ingredients_Inc. +effective_date=2007-01-31 jurisdiction=Texas party=Texas_United_Bancshares_Inc. term=24_months +effective_date=2001-10-05 jurisdiction=Oregon party=Innoveda_Inc. +effective_date=2007-04-23 jurisdiction=Colorado party=Industrial_Furnace_Company_Inc. term=2_years +effective_date=2018-04-26 jurisdiction=New_York party=Giant_Beverage_Inc. +jurisdiction=Michigan party=Flagstar_Bancorp_Inc. term=6_months +effective_date=2013-01-31 jurisdiction=Pennsylvania +effective_date=2006-09-22 jurisdiction=New_York term=5_years +effective_date=2012-07-13 jurisdiction=New_Jersey party=Heartland_Payment_Systems_Inc. term=12_months +effective_date=2011-05-27 jurisdiction=New_York party=Immucor_Inc. term=18_months +jurisdiction=Minnesota party=Choicetel_Communications_Inc. +effective_date=2018-11-01 jurisdiction=Ohio term=12_months +effective_date=1997-04-28 jurisdiction=Oregon party=1996_By_Acumed_Inc. +effective_date=2014-07-15 jurisdiction=New_Jersey party=Among_Ziprealty_Inc. +effective_date=2002-09-05 jurisdiction=Florida party=Hte_Inc. term=18_months +jurisdiction=Oregon party=Nike_Inc. term=2_years +jurisdiction=Massachusetts +effective_date=1998-04-01 jurisdiction=Ohio party=Athersys_Inc. term=18_months +effective_date=2008-02-19 jurisdiction=Minnesota party=Behalf_Of_Buca_Inc. +jurisdiction=Maryland party=Among_Sunshine_Financial_Inc. +effective_date=1999-11-15 jurisdiction=Indiana party=Of_Evergreen_Holdings_Inc. term=2_years +effective_date=2015-10-19 jurisdiction=California party=Social_Reality_Inc. +effective_date=2012-05-21 jurisdiction=Virginia term=24_months +effective_date=2017-12-31 jurisdiction=Tennessee party=Lappin_Corecivic_Inc. +effective_date=2009-02-16 jurisdiction=Delaware party=Chordiant_Software_Inc. +effective_date=2007-09-10 jurisdiction=Maryland party=Welocalize_Inc. term=3_years +effective_date=2013-03-11 jurisdiction=Minnesota +effective_date=2015-12-02 jurisdiction=Ohio party=Myers_Industries_Inc. term=2_years +effective_date=2000-08-02 jurisdiction=California party=Nomac_Energy_Systems_Inc. term=10_years +effective_date=2006-10-31 jurisdiction=New_Jersey party=Agreement_Doubletake_Software_Inc. term=2_years +jurisdiction=Colorado party=Aerogrow_International_Inc. term=3_years +jurisdiction=Indiana +effective_date=2006-10-01 jurisdiction=Virginia party=Maximus_Inc. term=4_years +jurisdiction=Minnesota party=Sps_Commerce_Inc. term=2_years +jurisdiction=New_York term=2_years +effective_date=2018-10-09 jurisdiction=Delaware party=Spark_Therapeutics_Inc. term=5_years +effective_date=2005-08-01 jurisdiction=New_York +jurisdiction=California party=Gigpeak_Inc. term=2_years +effective_date=2008-03-28 jurisdiction=Delaware party=Strictly_Confidential_Hologic_Inc. term=12_months +effective_date=2011-05-01 jurisdiction=Louisiana +effective_date=2007-11-30 jurisdiction=Georgia party=al_System_Services_Inc. term=5_years +effective_date=1998-09-22 jurisdiction=New_York +effective_date=2012-01-27 jurisdiction=New_York party=The_Talbots_Inc. +effective_date=1994-08-31 jurisdiction=Oregon party=Employee_Nike__Inc. +jurisdiction=Nevada party=Resorts_International_Marketing_Inc. term=2_years +jurisdiction=California party=Of_Violin_Memory_Inc. +effective_date=2008-06-16 jurisdiction=North_Carolina +effective_date=2008-03-12 jurisdiction=California party=Allergy_Research_Group_Inc. term=3_years +effective_date=2008-07-09 jurisdiction=Georgia party=Dynamic_Response_Group_Inc. term=2_years +effective_date=2010-01-07 jurisdiction=California party=Techwell_Inc. +effective_date=2012-06-22 jurisdiction=New_York party=Ksw_Inc. + +effective_date=2010-06-06 jurisdiction=New_York party=Clarient_Inc. +effective_date=2017-04-24 jurisdiction=Indiana party=Redhawk_Supreme_Industries_Inc. +jurisdiction=Georgia party=Nuclear_Operating_Company_Inc. +jurisdiction=New_Jersey party=Aegerion_Pharmaceuticals_Inc. +effective_date=2016-10-06 jurisdiction=Minnesota +effective_date=2007-04-18 jurisdiction=North_Carolina +effective_date=2011-11-15 jurisdiction=Georgia party=Comverge_Inc. term=2_years +effective_date=1996-03-04 jurisdiction=Kentucky term=2_years +effective_date=2019-07-19 jurisdiction=Nevada party=Cosmos_Group_Holdings_Inc. term=2_years +effective_date=2007-11-30 jurisdiction=Missouri +effective_date=2007-06-14 jurisdiction=Colorado party=By_Global_Casinos_Inc. term=5_years +effective_date=2010-04-27 jurisdiction=Connecticut party=Praxair_Inc. term=2_years +jurisdiction=Delaware party=Of_Leapfrog_Enterprises_Inc. term=2_years +effective_date=2020-11-24 jurisdiction=North_Carolina party=Red_Hat_Inc. term=12_months +effective_date=2017-02-10 jurisdiction=Delaware party=Kite_Pharma_Inc. term=5_years +effective_date=2018-12-17 jurisdiction=Georgia party=Colony_Bankcorp_Inc. term=2_years +jurisdiction=Delaware party=Strategic_Opportunity_Reit_Inc. term=2_years +effective_date=2013-05-29 jurisdiction=New_York +effective_date=2020-05-18 jurisdiction=Pennsylvania party=26_Neubase_Therapeutics_Inc. term=6_months +effective_date=2005-02-25 jurisdiction=California party=Specialized_Marketing_Services_Inc. term=2_years +jurisdiction=Maryland party=Vapotherm_Inc. +effective_date=2014-06-03 jurisdiction=California party=Ziprealty_Inc. +effective_date=2015-02-09 jurisdiction=Delaware party=Root9B_Technologies_Inc. +effective_date=2015-04-06 jurisdiction=Pennsylvania term=12_months +effective_date=2003-07-30 jurisdiction=New_York party=Alloy_Inc. +effective_date=2003-03-31 jurisdiction=Virginia party=Usa_Deck_Inc. term=2_years +effective_date=2001-03-02 jurisdiction=Virginia term=12_months +effective_date=2018-03-19 jurisdiction=Delaware party=Armo_Biosciences_Inc. +effective_date=2010-11-01 jurisdiction=Nevada +effective_date=2019-06-04 jurisdiction=Nevada party=Renovacare_Inc. term=48_months +effective_date=2006-11-01 jurisdiction=Delaware +effective_date=2008-09-10 jurisdiction=California party=Apollo_Medical_Holdings_Inc. +effective_date=2002-07-29 jurisdiction=Colorado term=6_months +effective_date=2014-07-15 party=The_World_Air_Inc. term=2_years diff --git a/train/out.tsv b/train/out.tsv new file mode 100644 index 0000000..d2998d7 --- /dev/null +++ b/train/out.tsv @@ -0,0 +1,254 @@ +effective_date=2001-04-18 jurisdiction=Oregon party=Nike_Inc. term=2_years +effective_date=2017-02-10 jurisdiction=Delaware party=Kite_Pharma_Inc. term=5_years +effective_date=2020-01-06 jurisdiction=Florida party=Neogenomics_Laboratories_Inc. +effective_date=1999-02-09 jurisdiction=Pennsylvania term=2_years +effective_date=2011-07-13 jurisdiction=California party=Lgp_Management_Inc. +effective_date=2004-11-19 jurisdiction=California party=Dolby_Laboratories_Inc. +effective_date=2009-09-23 jurisdiction=New_York term=2_years +effective_date=2011-02-28 jurisdiction=Delaware party=Of_Lacrosse_Footwear_Inc. +jurisdiction=Illinois term=2_years +effective_date=2006-12-28 jurisdiction=New_York +effective_date=2014-12-11 jurisdiction=Delaware party=Norcraft_Companies_Inc. term=2_years +jurisdiction=Iowa party=Renewable_Energy_Group_Inc. term=24_months +effective_date=2009-08-03 jurisdiction=Delaware party=Aspect_Medical_Systems_Inc. +jurisdiction=Indiana +effective_date=2013-02-18 jurisdiction=New_York +effective_date=2005-05-04 jurisdiction=Massachusetts party=Neon_Systems_Inc. term=2_years +effective_date=2008-08-11 jurisdiction=Maine party=Of_Ann_Taylor_Inc. +effective_date=2006-01-30 jurisdiction=Michigan +effective_date=2004-10-28 jurisdiction=Indiana party=Carolina_Industries_Inc. term=7_years +effective_date=2009-05-12 jurisdiction=Colorado party=Clovis_Oncology_Inc. term=3_years +effective_date=2013-05-01 jurisdiction=Florida term=2_years +jurisdiction=New_York term=2_years +effective_date=2019-10-17 jurisdiction=Oregon party=Donahoe_Ii_Nike_Inc. term=2_years +effective_date=2016-02-12 jurisdiction=Pennsylvania term=2_years +effective_date=2008-01-01 jurisdiction=Delaware term=12_months +effective_date=2008-09-10 jurisdiction=Florida party=A_Discover_Screens_Inc. +jurisdiction=Delaware +effective_date=2005-04-06 jurisdiction=Illinois party=By_Rubicon_Technology_Inc. term=36_months +effective_date=2011-05-16 jurisdiction=Illinois term=5_years +effective_date=2010-09-07 jurisdiction=Delaware party=Biodelivery_Sciences_International_Inc. +jurisdiction=California party=Cisco_Systems_Inc. term=5_years +effective_date=2014-03-21 jurisdiction=Delaware party=Cloudera_Inc. +effective_date=2013-11-10 jurisdiction=Missouri +effective_date=2001-03-26 jurisdiction=Oregon party=Nike_Inc. term=2_years +effective_date=2013-01-30 jurisdiction=Delaware +effective_date=2013-12-20 jurisdiction=Delaware party=Vocus_Inc. term=18_months +effective_date=2014-11-14 jurisdiction=Connecticut +effective_date=2015-11-28 jurisdiction=Nevada term=12_months +effective_date=2007-03-24 jurisdiction=New_York term=18_months +jurisdiction=Illinois party=Oak_Brook_Bancshares_Inc. +effective_date=2005-05-04 jurisdiction=Idaho party=Kimberly_Gold_Mines_Inc. +effective_date=2012-08-01 jurisdiction=Florida term=10_years +effective_date=2008-07-22 jurisdiction=Delaware term=2_years +effective_date=2011-05-05 jurisdiction=Delaware +effective_date=2007-03-15 jurisdiction=Minnesota party=Greatbatch_Inc. +jurisdiction=Virginia term=3_years +effective_date=2009-11-19 jurisdiction=California party=Diedrich_Coffee_Inc. +effective_date=2014-12-09 jurisdiction=California party=Digipath_Inc. +effective_date=2018-04-20 jurisdiction=Nevada +effective_date=2015-06-23 jurisdiction=New_York party=Higher_One_Holdings_Inc. +effective_date=2020-05-05 jurisdiction=Washington +effective_date=2004-06-08 jurisdiction=New_York party=Jones_Apparel_Group_Inc. +effective_date=2008-09-29 jurisdiction=Ohio party=Cardinal_Health_Inc. +effective_date=2019-04-03 jurisdiction=Nevada party=Among_Glacier_Bancorp_Inc. +effective_date=2011-06-13 jurisdiction=Florida party=Tz_Inc. +effective_date=2014-04-15 jurisdiction=Delaware party=World_Energy_Solutions_Inc. term=2_years +effective_date=2015-02-10 jurisdiction=Texas party=Pier_1_Imports_Inc. +jurisdiction=New_York +effective_date=2012-04-11 jurisdiction=New_York party=As_Medquist_Holdings_Inc. +effective_date=2005-01-11 jurisdiction=Virginia party=Clinical_Development_Consultants_Inc. term=5_years +jurisdiction=Wisconsin party=Schneider_National_Inc. term=18_months +effective_date=2010-05-17 jurisdiction=Colorado party=Victoria_Industries_Inc. +effective_date=2002-12-09 jurisdiction=Oregon term=5_years +effective_date=2015-11-16 jurisdiction=Delaware party=Anadigics_Inc. term=2_years +effective_date=2001-05-22 jurisdiction=Ohio party=Aep_Energy_Services_Inc. +effective_date=2009-01-09 jurisdiction=Missouri party=Under_The_Savvis_Inc. term=2_years +effective_date=2020-01-09 jurisdiction=South_Dakota term=2_years +effective_date=2008-06-09 jurisdiction=Maine party=Of_Ann_Taylor_Inc. +effective_date=2006-01-05 jurisdiction=Indiana party=Techniscan_Medical_Systems_Inc. term=3_years +effective_date=2012-12-07 jurisdiction=Minnesota party=Caribou_Coffee_Company_Inc. +jurisdiction=Maine +jurisdiction=Kansas party=Among_Epiq_Systems_Inc. term=2_years +effective_date=2011-02-08 jurisdiction=Delaware party=Coleman_Cable_Inc. +effective_date=2005-05-31 jurisdiction=Illinois party=Promark_Technology_Inc. term=5_years +effective_date=1999-08-24 jurisdiction=Indiana party=Of_Clean_Harbors_Inc. +jurisdiction=Massachusetts +effective_date=2007-02-14 jurisdiction=Illinois party=Option_Care_Inc. term=2_years +effective_date=2005-01-01 jurisdiction=New_Jersey term=22_months +effective_date=2008-03-05 jurisdiction=California party=Cv_Therapeutics_Inc. term=5_years +effective_date=2009-03-25 jurisdiction=California term=3_years +effective_date=2007-12-31 jurisdiction=Maine +jurisdiction=North_Carolina +effective_date=2007-01-10 jurisdiction=Missouri term=12_months +jurisdiction=Georgia party=Gentiva_Health_Services_Inc. term=2_years +effective_date=2018-01-01 jurisdiction=Missouri term=3_years +effective_date=2010-03-17 jurisdiction=New_York party=2010_Flir_Systems_Inc. +effective_date=2020-03-15 jurisdiction=Georgia party=Citi_Trends_Inc. term=2_years +effective_date=2008-04-22 jurisdiction=New_York +effective_date=2010-01-02 jurisdiction=Kansas +effective_date=2007-05-17 jurisdiction=California party=Inca_Acquisition_Inc. term=2_years +jurisdiction=Oregon party=Nike_Inc. term=2_years +effective_date=2012-10-15 jurisdiction=Delaware party=Jll_Partners_Inc. +effective_date=2007-02-15 jurisdiction=Delaware party=Global_Imaging_Systems_Inc. term=2_years +jurisdiction=Connecticut party=Fsc_Ct_Inc. term=3_months +effective_date=2003-03-27 jurisdiction=Utah term=12_months +jurisdiction=Texas party=Mavenir_Systems_Inc. term=3_years +effective_date=2020-02-02 jurisdiction=Delaware party=Caddystats_Inc. +effective_date=2011-02-01 jurisdiction=Ohio +effective_date=2009-01-07 jurisdiction=California party=Logo_Cost_Plus_Inc. +jurisdiction=California party=Boingo_Wireless_Inc. term=12_months +effective_date=2020-11-16 jurisdiction=New_York party=Crew_Group_Inc. term=3_years +effective_date=2000-12-31 jurisdiction=North_Carolina term=2_years +effective_date=2011-03-22 jurisdiction=Texas party=Amerisourcebergen_Specialty_Group_Inc. +effective_date=2011-07-14 jurisdiction=New_York party=S_Restaurant_Group_Inc. +effective_date=2017-10-25 jurisdiction=New_Jersey term=2_years +effective_date=2007-02-28 jurisdiction=Georgia +effective_date=2010-03-16 jurisdiction=Massachusetts term=2_years +effective_date=2011-01-12 jurisdiction=Texas party=Confidential_Exco_Resources_Inc. term=15_months +effective_date=2004-08-03 jurisdiction=Delaware party=Md_21017_Datakey_Inc. term=3_years +jurisdiction=New_York party=2_Delcath_Systems_Inc. term=12_months +effective_date=2007-10-05 jurisdiction=Pennsylvania term=10_years +effective_date=2016-05-05 jurisdiction=Pennsylvania party=Among_Endo_Pharmaceuticals_Inc. +effective_date=2010-11-04 jurisdiction=Massachusetts +effective_date=2012-03-02 jurisdiction=Delaware party=S_China_Bistro_Inc. +effective_date=2014-09-18 jurisdiction=Florida party=Neogenomics_Inc. term=2_years +effective_date=2013-05-13 jurisdiction=California party=Avanir_Pharmaceuticals_Inc. term=2_years +effective_date=2011-10-20 jurisdiction=New_York party=Successfactors_Inc. term=12_months +effective_date=2012-10-30 jurisdiction=Delaware party=Version_Westway_Group_Inc. +effective_date=1994-10-06 jurisdiction=Oregon party=Employee_Nike_Inc. +effective_date=2004-10-11 jurisdiction=North_Carolina party=Inspire_Pharmaceuticals_Inc. +effective_date=2007-09-03 jurisdiction=Delaware party=Tektronix_Inc. +effective_date=2014-12-16 jurisdiction=New_York party=Realty_Capital_Properties_Inc. +effective_date=2007-03-19 jurisdiction=Delaware term=12_months +effective_date=2008-02-07 jurisdiction=Delaware party=Kintera_Inc. term=3_years +jurisdiction=Maine party=Of_Ann_Taylor_Inc. +jurisdiction=Massachusetts party=Restorbio_Inc. +effective_date=2003-02-07 jurisdiction=Massachusetts term=12_months +effective_date=2007-10-12 jurisdiction=New_York party=Adams_Respiratory_Therapeutics_Inc. +effective_date=2005-01-25 jurisdiction=Missouri party=Include_Energizer_Holdings_Inc. term=5_years +effective_date=2017-03-02 jurisdiction=Virginia term=12_months +effective_date=2008-09-01 jurisdiction=California party=Thomas_Properties_Group_Inc. +effective_date=2020-11-30 jurisdiction=Virginia party=Shire_Pharmaceuticals_Inc. term=5_years +effective_date=2006-11-22 jurisdiction=California + +jurisdiction=Wisconsin party=Petsmart_Inc. +jurisdiction=Washington +effective_date=2006-04-13 jurisdiction=New_York party=Coat_Factory_Holdings_Inc. term=3_years +effective_date=2014-07-24 jurisdiction=California +jurisdiction=Illinois party=Federated_Department_Stores_Inc. +effective_date=2014-04-06 jurisdiction=Delaware party=Confidential_Vocus_Inc. +effective_date=2005-03-01 jurisdiction=Massachusetts party=Private_Financial_Holdings_Inc. term=6_months +effective_date=2007-05-17 jurisdiction=California party=Inca_Acquisition_Inc. term=2_years +effective_date=2014-06-10 jurisdiction=Ohio party=Cardinal_Health_Inc. +jurisdiction=Illinois +effective_date=2013-01-22 jurisdiction=New_York party=Crew_Group_Inc. term=3_years +effective_date=1993-08-03 jurisdiction=New_Jersey party=Biodelivery_Sciences_International_Inc. term=2_years +effective_date=2011-04-15 jurisdiction=Delaware +jurisdiction=Massachusetts party=Vistaprint_Usa_Inc. +jurisdiction=Massachusetts +jurisdiction=Utah party=Usana_Health_Sciences_Inc. +effective_date=2020-11-06 jurisdiction=Washington term=5_years +jurisdiction=Texas +effective_date=2007-10-28 jurisdiction=California party=Blue_Coat_Systems_Inc. term=3_years +jurisdiction=California party=Alcis_Health_Inc. term=3_years +effective_date=2007-02-06 jurisdiction=Colorado party=Precision_Metal_Manufacturing_Inc. term=5_years +effective_date=2005-10-12 jurisdiction=Georgia party=2005_Koch_Industries_Inc. term=2_years +effective_date=2009-02-02 jurisdiction=Delaware party=Transport_Services_Group_Inc. +jurisdiction=Pennsylvania term=12_months +effective_date=2005-01-01 jurisdiction=New_Jersey term=12_months +jurisdiction=Virginia party=Comstock_Homebuilding_Companies_Inc. term=2_years +effective_date=2005-07-01 jurisdiction=New_York party=2005_Omnicare_Inc. +jurisdiction=Michigan term=12_months +jurisdiction=Nevada term=6_months +effective_date=2006-04-13 jurisdiction=New_York party=Coat_Factory_Holdings_Inc. term=3_years +effective_date=2008-10-01 jurisdiction=Virginia term=5_years +effective_date=2008-09-10 jurisdiction=California party=Hifn_Inc. +effective_date=2006-02-28 jurisdiction=New_York party=Gentiva_Health_Services_Inc. +effective_date=2009-05-04 jurisdiction=New_York party=Api_Electronics_Inc. +effective_date=2004-09-03 jurisdiction=California party=Nstor_Corporation_Inc. term=3_years +effective_date=2012-07-09 jurisdiction=New_Jersey party=Heartland_Payment_Systems_Inc. term=6_months +jurisdiction=Missouri party=Savvis_Inc. term=12_months +effective_date=2004-02-24 jurisdiction=Illinois term=2_years +effective_date=2011-07-22 jurisdiction=Texas +jurisdiction=New_Jersey term=2_years +effective_date=2009-05-21 jurisdiction=New_York +effective_date=2014-02-19 jurisdiction=New_York +effective_date=2018-10-01 jurisdiction=Missouri term=5_years +effective_date=2007-05-22 jurisdiction=Delaware +effective_date=2010-06-24 jurisdiction=Nevada term=6_months +effective_date=2011-10-10 jurisdiction=Florida +effective_date=2010-07-01 jurisdiction=Kansas party=Yrc_Worldwide_Inc. +effective_date=2009-03-04 jurisdiction=Oregon party=Nike_Inc. term=2_years +jurisdiction=Delaware term=18_months +effective_date=2018-03-26 jurisdiction=New_York party=Agreement_Avon_Products_Inc. +effective_date=2007-04-19 jurisdiction=New_York party=2007_Cardinal_Health_Inc. +effective_date=2016-08-08 jurisdiction=Texas party=Team_Industrial_Services_Inc. term=12_months +effective_date=2004-03-09 jurisdiction=New_Jersey party=Defendant_Allendale_Pharmaceuticals_Inc. +jurisdiction=Florida party=Faro_Technologies_Inc. term=5_years +effective_date=2016-11-08 jurisdiction=New_York party=Wizard_World_Inc. +effective_date=2007-07-23 jurisdiction=Virginia term=3_years +effective_date=2009-02-03 jurisdiction=Delaware party=Vnus_Medical_Technologies_Inc. +effective_date=2018-03-19 jurisdiction=Delaware party=Armo_Biosciences_Inc. +jurisdiction=Oregon party=Avi_Biopharma_Inc. +effective_date=2015-01-01 jurisdiction=Washington party=The_Travelers_Companies_Inc. term=12_months +jurisdiction=Texas party=American_Campus_Communities_Inc. +effective_date=2002-05-16 jurisdiction=California term=3_years +effective_date=2011-08-11 jurisdiction=Delaware party=Micromet_Inc. +effective_date=1991-08-01 jurisdiction=California term=5_years +effective_date=2006-08-08 jurisdiction=New_York party=Crew_Group_Inc. term=3_years +jurisdiction=Delaware +jurisdiction=Colorado party=Lightwave_Logic_Inc. +effective_date=2010-03-02 jurisdiction=Pennsylvania party=Healthtronics_Inc. term=5_years +effective_date=2015-05-18 jurisdiction=New_York party=Information_Regarding_Blyth_Inc. +jurisdiction=Indiana party=Zimmer_Inc. term=2_years +effective_date=2009-01-17 jurisdiction=Delaware +effective_date=2007-07-03 jurisdiction=Pennsylvania +effective_date=2011-12-14 jurisdiction=Massachusetts term=18_months +jurisdiction=Massachusetts +effective_date=2015-01-20 jurisdiction=New_York term=2_years +effective_date=2006-11-09 jurisdiction=Ohio party=Cardinal_Health_Inc. term=7_months +effective_date=2018-01-15 jurisdiction=Illinois term=12_months +jurisdiction=California party=Acologix_Inc. term=12_months +effective_date=2014-07-31 jurisdiction=California term=5_years +effective_date=2001-12-03 jurisdiction=California party=Palm_Inc. term=5_years +effective_date=2008-07-24 jurisdiction=Oregon party=Parker_Nike_Inc. term=2_years +effective_date=2014-04-07 jurisdiction=Texas +effective_date=2009-06-30 jurisdiction=New_York +effective_date=2016-08-09 jurisdiction=Michigan party=Harbert_Fund_Advisors_Inc. +jurisdiction=Delaware +effective_date=2001-09-17 jurisdiction=California term=3_years +effective_date=2013-04-19 jurisdiction=Florida party=Neogenomics_Inc. term=2_years +effective_date=2002-07-08 jurisdiction=California term=3_years +jurisdiction=Ohio +effective_date=2008-02-04 jurisdiction=New_York party=2008_Kinetic_Concepts_Inc. term=3_years +effective_date=2020-10-26 jurisdiction=Massachusetts party=Comverse_Inc. term=6_months +jurisdiction=New_York party=Paetec_Communications_Inc. term=12_months +effective_date=2005-08-01 jurisdiction=Georgia party=Acuity_Brands_Inc. term=24_months +effective_date=2015-01-12 jurisdiction=New_York party=Borderfree_Inc. term=3_years +effective_date=2008-12-01 jurisdiction=Massachusetts term=12_months +jurisdiction=Texas term=3_years +effective_date=2013-01-25 jurisdiction=New_York party=Tech_Environmental_Services_Inc. +jurisdiction=Pennsylvania party=Globus_Medical_Inc. +effective_date=2003-04-09 jurisdiction=Michigan party=Genesis_Bioventures_Inc. term=5_years +effective_date=2009-06-23 jurisdiction=Washington party=By_Infrastrux_Group_Inc. term=2_years +effective_date=2010-10-01 jurisdiction=New_York term=20_years +effective_date=2007-12-10 jurisdiction=Missouri +effective_date=2011-07-02 jurisdiction=California +jurisdiction=California party=Social_Reality_Inc. +effective_date=2006-05-11 jurisdiction=California party=Of_Beckman_Coulter_Inc. +effective_date=2012-07-13 jurisdiction=Texas party=Union_Drilling_Inc. +effective_date=2006-07-10 jurisdiction=Florida party=Sun_Energy_Solar_Inc. term=5_years +effective_date=2003-04-01 jurisdiction=Ohio party=Arc_Communications_Inc. term=5_years +effective_date=2011-03-21 jurisdiction=Delaware party=Lgp_Management_Inc. +effective_date=2017-02-01 jurisdiction=New_York +effective_date=2010-04-02 jurisdiction=New_York party=Sybase_Inc. +effective_date=2007-06-15 jurisdiction=Pennsylvania party=Scanlan_Orthovita_Inc. +effective_date=2003-09-10 jurisdiction=New_York term=5_years +jurisdiction=Minnesota party=27_Cvs_Pharmacy_Inc. term=3_years +effective_date=2006-05-19 jurisdiction=California party=Gilead_Sciences_Inc. term=5_years +effective_date=2005-03-28 jurisdiction=Florida +effective_date=2006-04-13 jurisdiction=New_York party=Coat_Factory_Holdings_Inc. term=3_years +effective_date=2010-03-19 jurisdiction=Connecticut term=2_years +effective_date=2011-01-27 jurisdiction=California party=Lgp_Management_Inc. +effective_date=2003-07-02 jurisdiction=Delaware party=Lydall_Inc. term=2_years