kleister-nda/heSaidEdgar.ipynb at c2748dc6572bb4d31fef8f098f96e2cebf8f1b93

2022-05-03 20:10:12 +02:00

3.5 KiB

Raw Blame History

import lzma

NDAs = []
with lzma.open('train/in.tsv.xz') as f:
    for line in f:
        NDAs.append(line.decode('utf-8'))

import spacy
from spacy import displacy

nlp = spacy.load('NER')

text = NDAs[9]
doc = nlp(text)

effective_date = []
jurisdiction = []
party = []
term = []

for word in doc.ents:
    if word.label_ == 'effective_date':
        effective_date.append(word.text)
    elif word.label_ == 'jurisdiction':
        jurisdiction.append(word.text)
    elif word.label_ == 'party':
        party.append(word.text)
    else:
        term.append(word.text)

jurisdiction.count('New York')

juris = { j : jurisdiction.count(j) for j in jurisdiction}

juris

{'New York': 12}

text = NDAs[9]
doc = nlp(text)
for word in doc.ents:
    print(word.text, '-->', word.label_)

CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction
CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction
CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction
CompuDyne Corporation --> party
two years --> term
New York --> jurisdiction
New York --> jurisdiction
New York --> jurisdiction

3.5 KiB Raw Blame History

3.5 KiB

Raw Blame History