3.5 KiB
3.5 KiB
import lzma
NDAs = []
with lzma.open('train/in.tsv.xz') as f:
for line in f:
NDAs.append(line.decode('utf-8'))
import spacy
from spacy import displacy
nlp = spacy.load('NER')
text = NDAs[9]
doc = nlp(text)
effective_date = []
jurisdiction = []
party = []
term = []
for word in doc.ents:
if word.label_ == 'effective_date':
effective_date.append(word.text)
elif word.label_ == 'jurisdiction':
jurisdiction.append(word.text)
elif word.label_ == 'party':
party.append(word.text)
else:
term.append(word.text)
jurisdiction.count('New York')
12
juris = { j : jurisdiction.count(j) for j in jurisdiction}
juris
{'New York': 12}
text = NDAs[9]
doc = nlp(text)
for word in doc.ents:
print(word.text, '-->', word.label_)
CompuDyne Corporation --> party two years --> term New York --> jurisdiction New York --> jurisdiction New York --> jurisdiction CompuDyne Corporation --> party two years --> term New York --> jurisdiction New York --> jurisdiction New York --> jurisdiction CompuDyne Corporation --> party two years --> term New York --> jurisdiction New York --> jurisdiction New York --> jurisdiction CompuDyne Corporation --> party two years --> term New York --> jurisdiction New York --> jurisdiction New York --> jurisdiction