kleister-nda/heSaidEdgar.ipynb
2022-05-03 20:10:12 +02:00

3.5 KiB

import lzma

NDAs = []
with lzma.open('train/in.tsv.xz') as f:
    for line in f:
        NDAs.append(line.decode('utf-8'))
import spacy
from spacy import displacy

nlp = spacy.load('NER')

text = NDAs[9]
doc = nlp(text)

effective_date = []
jurisdiction = []
party = []
term = []

for word in doc.ents:
    if word.label_ == 'effective_date':
        effective_date.append(word.text)
    elif word.label_ == 'jurisdiction':
        jurisdiction.append(word.text)
    elif word.label_ == 'party':
        party.append(word.text)
    else:
        term.append(word.text)
jurisdiction.count('New York')
12
juris = { j : jurisdiction.count(j) for j in jurisdiction}
juris
{'New York': 12}
text = NDAs[9]
doc = nlp(text)
for word in doc.ents:
    print(word.text, '-->', word.label_)
CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction
CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction
CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction
CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction