diff --git a/main.ipynb b/main.ipynb index ad6b641..3f10940 100644 --- a/main.ipynb +++ b/main.ipynb @@ -178,7 +178,7 @@ ], "source": [ "import spacy\n", - "from spacy.tokens import DocBin\n", + "# from spacy.tokens import DocBin\n", "\n", "model = None\n", "nIter = 100\n", @@ -219,30 +219,972 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 1%| | 3/254 [00:00<01:11, 3.49it/s]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"03efbda01358533c167ca9b1e6d72051.pdf\teffective_dat...\" with entities \"[(7513, 7521, 'effective_date'), (15032, 15040, 'e...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 2%|▏ | 6/254 [00:03<03:05, 1.34it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"04bf0791804e8487c91ab84eaa47a335.pdf\teffective_dat...\" with entities \"[(198, 216, 'effective_date'), (22663, 22681, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 2%|▏ | 4/254 [00:01<02:28, 1.68it/s]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"03fd0e629b617da00c54794a8a78b24d.pdf\teffective_dat...\" with entities \"[(287, 300, 'effective_date'), (25276, 25289, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 3%|▎ | 8/254 [00:06<03:54, 1.05it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0587275477c6ad6d0d72419383e04b88.pdf\teffective_dat...\" with entities \"[(4528, 4536, 'jurisdiction'), (4604, 4612, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 2%|▏ | 6/254 [00:04<04:11, 1.01s/it]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"04bf0791804e8487c91ab84eaa47a335.pdf\teffective_dat...\" with entities \"[(198, 216, 'effective_date'), (22663, 22681, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 4%|▎ | 9/254 [00:13<10:57, 2.68s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"05947711a24a5b7ce401911d31e19c91.pdf\teffective_dat...\" with entities \"[(18271, 18279, 'jurisdiction'), (18507, 18515, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 3%|▎ | 8/254 [00:07<04:37, 1.13s/it]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0587275477c6ad6d0d72419383e04b88.pdf\teffective_dat...\" with entities \"[(4528, 4536, 'jurisdiction'), (4604, 4612, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 6%|▌ | 14/254 [00:18<04:23, 1.10s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0859334b76224ff82c1312ae7b2b5da1.pdf\teffective_dat...\" with entities \"[(279, 296, 'effective_date'), (22981, 22998, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 4%|▎ | 9/254 [00:12<09:04, 2.22s/it]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"05947711a24a5b7ce401911d31e19c91.pdf\teffective_dat...\" with entities \"[(18271, 18279, 'jurisdiction'), (18507, 18515, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 7%|▋ | 17/254 [00:21<03:24, 1.16it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0c3ab1d0c8bb3b1c2f7a64f3ab584368.pdf\teffective_dat...\" with entities \"[(243, 259, 'effective_date'), (35225, 35241, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 6%|▌ | 14/254 [00:18<04:18, 1.08s/it]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0859334b76224ff82c1312ae7b2b5da1.pdf\teffective_dat...\" with entities \"[(279, 296, 'effective_date'), (22981, 22998, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 7%|▋ | 18/254 [00:23<04:39, 1.18s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0c7b90701575b147c4ac245ca478ee7c.pdf\teffective_dat...\" with entities \"[(10058, 10065, 'jurisdiction'), (10252, 10259, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 7%|▋ | 17/254 [00:20<03:29, 1.13it/s]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0c3ab1d0c8bb3b1c2f7a64f3ab584368.pdf\teffective_dat...\" with entities \"[(243, 259, 'effective_date'), (35225, 35241, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 8%|▊ | 20/254 [00:25<04:09, 1.07s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0f446b4ed10d8d40824270d746511cca.pdf\tjurisdiction ...\" with entities \"[(261, 268, 'jurisdiction'), (901, 908, 'jurisdict...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 7%|▋ | 18/254 [00:23<04:38, 1.18s/it]/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/spacy/training/iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"0c7b90701575b147c4ac245ca478ee7c.pdf\teffective_dat...\" with entities \"[(10058, 10065, 'jurisdiction'), (10252, 10259, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " 8%|▊ | 21/254 [00:27<04:35, 1.18s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"1058cd8d541c0622ad959facd34235ea.pdf\teffective_dat...\" with entities \"[(21973, 21981, 'jurisdiction'), (46056, 46064, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", " warnings.warn(\n", - " 7%|▋ | 19/254 [00:25<05:25, 1.39s/it]" + " 9%|▉ | 23/254 [00:30<04:49, 1.25s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"15398fb3b5f357981a8be88dc4bb376e.pdf\teffective_dat...\" with entities \"[(579, 591, 'jurisdiction'), (17167, 17179, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 9%|▉ | 24/254 [00:34<07:47, 2.03s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"154d30f607c74aa8a5f582bf84f7a5e2.pdf\teffective_dat...\" with entities \"[(379, 387, 'jurisdiction'), (22505, 22513, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 13%|█▎ | 32/254 [00:37<01:42, 2.17it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"1a5847e0b968e25ddcf41ac9c6fc63b4.pdf\teffective_dat...\" with entities \"[(210, 227, 'effective_date'), (708, 725, 'effecti...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 14%|█▍ | 36/254 [00:41<02:11, 1.65it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"1c1705ebb86fb8c9ddd2c765d1d59486.pdf\teffective_dat...\" with entities \"[(356, 373, 'effective_date'), (14632, 14649, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 15%|█▍ | 37/254 [00:43<03:18, 1.09it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"1c36bbc314ee3f0cbe059d15d4fdd36a.pdf\teffective_dat...\" with entities \"[(250, 267, 'effective_date'), (31244, 31261, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 17%|█▋ | 43/254 [00:49<02:03, 1.70it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"22526e24107177141dc9b66afed7106d.pdf\teffective_dat...\" with entities \"[(265, 273, 'jurisdiction'), (12609, 12617, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 18%|█▊ | 45/254 [00:51<02:11, 1.59it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"232b3bee703427df8e9893e4a52d5d60.pdf\teffective_dat...\" with entities \"[(16031, 16039, 'jurisdiction'), (16220, 16228, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 19%|█▉ | 48/254 [00:53<02:00, 1.71it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"247166e0245431dcf97ee884f1f07e35.pdf\teffective_dat...\" with entities \"[(156, 170, 'effective_date'), (508, 522, 'effecti...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 20%|█▉ | 50/254 [00:54<01:47, 1.90it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"2632c4c1238356489cab88d58e1a5fb0.pdf\teffective_dat...\" with entities \"[(15689, 15697, 'effective_date'), (15719, 15727, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 22%|██▏ | 56/254 [00:57<01:12, 2.74it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"2ab67f26bc51d57492e3f27b244fae3e.pdf\teffective_dat...\" with entities \"[(22097, 22111, 'effective_date'), (44207, 44221, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 24%|██▎ | 60/254 [01:02<02:53, 1.12it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"2ce3bbe2d6836d8b023c55883294fa63.pdf\teffective_dat...\" with entities \"[(9413, 9422, 'jurisdiction'), (9806, 9815, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 29%|██▉ | 74/254 [01:09<00:50, 3.57it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"376f9746de69416a9561e92517c356ee.pdf\teffective_dat...\" with entities \"[(306, 330, 'effective_date'), (9197, 9221, 'effec...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 30%|███ | 77/254 [01:10<00:58, 3.05it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"39610c6bf605fdd8d0d9bcb2aacb5e74.pdf\teffective_dat...\" with entities \"[(51002, 51012, 'jurisdiction'), (51132, 51142, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 31%|███ | 79/254 [01:21<07:25, 2.54s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"3acc6f6bdad6eaaf7ab21faea5ea95fa.pdf\teffective_dat...\" with entities \"[(251, 265, 'effective_date'), (23669, 23683, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 31%|███▏ | 80/254 [01:24<07:27, 2.57s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"3c19cab83f40f722fc8c1432299d7655.pdf\teffective_dat...\" with entities \"[(290, 312, 'effective_date'), (30391, 30413, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 37%|███▋ | 94/254 [01:31<00:46, 3.46it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"480fcdb1b3d02989c11ace2c69bc9ba6.pdf\teffective_dat...\" with entities \"[(22255, 22269, 'effective_date'), (44472, 44486, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 38%|███▊ | 96/254 [01:34<02:09, 1.22it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"495f7d16921a1c8531be0844db0828a4.pdf\teffective_dat...\" with entities \"[(130, 155, 'effective_date'), (19409, 19434, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 40%|███▉ | 101/254 [01:35<00:57, 2.65it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"4dc5c39e601cd476f4c2def0e6b96915.pdf\teffective_dat...\" with entities \"[(329, 343, 'jurisdiction'), (29204, 29218, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 41%|████ | 103/254 [01:40<02:41, 1.07s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"4dd5eca6802e0f03214db3dfd4881638.pdf\teffective_dat...\" with entities \"[(19819, 19832, 'effective_date'), (39990, 40003, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 41%|████▏ | 105/254 [01:42<02:38, 1.07s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"4f0e455a90c53f8e40e09d324aab4ea3.pdf\teffective_dat...\" with entities \"[(533, 550, 'effective_date'), (2868, 2885, 'effec...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 42%|████▏ | 106/254 [01:46<04:23, 1.78s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"4fd432d8ce6796dabc17d3838d8539a2.pdf\teffective_dat...\" with entities \"[(162, 176, 'effective_date'), (15065, 15079, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 44%|████▎ | 111/254 [01:48<01:23, 1.71it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"5b070e9583099dfdcddc9c9c811b7d44.pdf\teffective_dat...\" with entities \"[(10864, 10876, 'jurisdiction'), (23351, 23363, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 44%|████▍ | 112/254 [01:50<01:56, 1.22it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"5d18471dc0cb8c824fe86d5899aeb24b.pdf\teffective_dat...\" with entities \"[(25946, 25952, 'effective_date'), (52332, 52338, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 44%|████▍ | 113/254 [01:53<03:38, 1.55s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"5f542bf5a9d00298d7743fd2acbcbfd5.pdf\teffective_dat...\" with entities \"[(352, 360, 'jurisdiction'), (18053, 18061, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 48%|████▊ | 122/254 [01:57<00:37, 3.49it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"6c0e2103cb185f28b0c1e9109c674836.pdf\teffective_dat...\" with entities \"[(20591, 20598, 'effective_date'), (20707, 20714, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 50%|████▉ | 126/254 [02:00<00:48, 2.66it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"6ecf1846ef305f44deb8f5c64da3b999.pdf\teffective_dat...\" with entities \"[(1140, 1156, 'effective_date'), (23227, 23243, 'e...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 53%|█████▎ | 134/254 [02:04<00:32, 3.65it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"72f91408258be59298f412dad65e3baa.pdf\teffective_dat...\" with entities \"[(1190, 1199, 'jurisdiction'), (7274, 7283, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 53%|█████▎ | 135/254 [02:07<01:55, 1.03it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"73bfeebfeca04b3a804d844cbf16d7f3.pdf\teffective_dat...\" with entities \"[(3546, 3556, 'jurisdiction'), (9893, 9903, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 54%|█████▍ | 137/254 [02:08<01:23, 1.40it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"7496116e8680dac321f36147b6312411.pdf\teffective_dat...\" with entities \"[(13407, 13420, 'effective_date'), (26980, 26993, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 54%|█████▍ | 138/254 [02:09<01:48, 1.07it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"7684f321eb08514fa1794427e73479b9.pdf\teffective_dat...\" with entities \"[(16189, 16197, 'jurisdiction'), (16325, 16333, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 55%|█████▍ | 139/254 [02:12<02:54, 1.52s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"782c651fc7cf288ec2f8857de0d6bb58.pdf\teffective_dat...\" with entities \"[(5539, 5552, 'jurisdiction'), (11301, 11314, 'jur...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 59%|█████▊ | 149/254 [02:16<00:36, 2.88it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"7cf3dfaf7afd9989de90cb3cbd8d6a83.pdf\teffective_dat...\" with entities \"[(13571, 13578, 'effective_date'), (27080, 27087, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 59%|█████▉ | 151/254 [02:18<00:53, 1.94it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"7cfa17a4165369964337c2f46c40e3a2.pdf\teffective_dat...\" with entities \"[(13099, 13104, 'jurisdiction'), (13304, 13309, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 62%|██████▏ | 157/254 [02:20<00:31, 3.09it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"82b263d025fddef5a8048b34eed91942.pdf\teffective_dat...\" with entities \"[(15481, 15493, 'jurisdiction'), (15986, 15998, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 62%|██████▏ | 158/254 [02:22<01:05, 1.46it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"83a79ed689ef320a8f65e0268de91e10.pdf\teffective_dat...\" with entities \"[(57077, 57087, 'jurisdiction'), (57207, 57217, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 63%|██████▎ | 161/254 [02:32<02:40, 1.73s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"86e9c90fa3986691fcb140266f514c7d.pdf\teffective_dat...\" with entities \"[(14567, 14575, 'jurisdiction'), (30390, 30398, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 64%|██████▍ | 162/254 [02:34<02:41, 1.75s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"8a7fedc5ffa5c2ffa424753229b52943.pdf\teffective_dat...\" with entities \"[(10588, 10594, 'jurisdiction'), (11064, 11070, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 66%|██████▌ | 167/254 [02:36<00:49, 1.77it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"988f7c53c00bb333a4b7188738a25378.pdf\teffective_dat...\" with entities \"[(7568, 7576, 'jurisdiction'), (10008, 10016, 'jur...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 68%|██████▊ | 172/254 [02:41<00:44, 1.84it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"9d70181e77cf74279fb6712c569da104.pdf\teffective_dat...\" with entities \"[(226, 239, 'effective_date'), (26381, 26394, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 69%|██████▉ | 176/254 [02:45<00:46, 1.68it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"a373847e741d0b4db97466b8964a66ae.pdf\teffective_dat...\" with entities \"[(18914, 18922, 'jurisdiction'), (19057, 19065, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 70%|██████▉ | 177/254 [02:48<01:39, 1.29s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"a3ba9b969b390ce8ec0f62dde48f5a1f.pdf\teffective_dat...\" with entities \"[(291, 303, 'effective_date'), (17055, 17067, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 70%|███████ | 178/254 [02:50<01:51, 1.46s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"a527509f8b744d57fc406679ab2287e0.pdf\teffective_dat...\" with entities \"[(315, 337, 'effective_date'), (11945, 11967, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 72%|███████▏ | 182/254 [02:52<00:43, 1.64it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"a87ebed40675b7ed9c2d4a0721abbefb.pdf\teffective_dat...\" with entities \"[(25895, 25903, 'jurisdiction'), (55588, 55596, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 74%|███████▍ | 188/254 [02:56<00:29, 2.27it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"b960e85adabccfba6d758948a1ecc804.pdf\teffective_dat...\" with entities \"[(12062, 12075, 'effective_date'), (24158, 24171, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 77%|███████▋ | 195/254 [03:00<00:40, 1.45it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"c2149cc784d2d783c2de0c7b2f02a12f.pdf\teffective_dat...\" with entities \"[(11364, 11371, 'effective_date'), (11398, 11405, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 78%|███████▊ | 197/254 [03:02<00:40, 1.40it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"c4ccca5a5502597fc4a75b4ca50337df.pdf\teffective_dat...\" with entities \"[(4849, 4859, 'jurisdiction'), (11069, 11079, 'jur...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 80%|████████ | 204/254 [03:04<00:15, 3.24it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"c94fdb196d2502f60e21793b387023de.pdf\teffective_dat...\" with entities \"[(240, 256, 'effective_date'), (23602, 23618, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 81%|████████▏ | 207/254 [03:07<00:28, 1.63it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"cbbcc01ea9cfa4ec8bfa27f0f9f71088.pdf\teffective_dat...\" with entities \"[(24491, 24504, 'jurisdiction'), (24775, 24788, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 83%|████████▎ | 210/254 [03:11<00:35, 1.22it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"cf34c9403e0092eca75ed9fc61284268.pdf\teffective_dat...\" with entities \"[(256, 272, 'effective_date'), (16765, 16781, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 85%|████████▍ | 215/254 [03:14<00:16, 2.39it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"d2cedafb5d6fc0a7a2f4693f652606ef.pdf\teffective_dat...\" with entities \"[(34099, 34104, 'jurisdiction'), (34219, 34224, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 85%|████████▌ | 216/254 [03:19<01:14, 1.95s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"d50b5f4cf1b059aed9adb4d3d8953d84.pdf\teffective_dat...\" with entities \"[(218, 223, 'jurisdiction'), (15327, 15332, 'juris...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 86%|████████▌ | 219/254 [03:22<00:38, 1.10s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"d789f0680308f0638a05078c5d896b7a.pdf\teffective_dat...\" with entities \"[(292, 310, 'effective_date'), (24569, 24587, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 90%|████████▉ | 228/254 [03:26<00:09, 2.85it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"e29c3877a103aaefcf77ebb110f981a5.pdf\teffective_dat...\" with entities \"[(24146, 24159, 'jurisdiction'), (24324, 24337, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 90%|█████████ | 229/254 [03:29<00:27, 1.11s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"e33d3ca6885f31faa68b2ab766afc86b.pdf\teffective_dat...\" with entities \"[(360, 365, 'jurisdiction'), (551, 556, 'jurisdict...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 96%|█████████▌| 243/254 [03:38<00:11, 1.02s/it]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"f4d4ef76c5ce9b0d5bca8c55369b753c.pdf\teffective_dat...\" with entities \"[(19234, 19242, 'jurisdiction'), (19468, 19476, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 97%|█████████▋| 246/254 [03:40<00:06, 1.21it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"f6cf95250272fd7f3fd767819ee11255.pdf\teffective_dat...\" with entities \"[(6008, 6016, 'jurisdiction'), (14222, 14230, 'jur...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 98%|█████████▊| 248/254 [03:42<00:04, 1.47it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"fbf608b62ef498171b70fb7b36be61a0.pdf\teffective_dat...\" with entities \"[(30197, 30220, 'effective_date'), (3688, 3695, 'j...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + " 99%|█████████▉| 252/254 [03:43<00:00, 2.34it/s]j:\\.AppData\\Python\\Python38\\site-packages\\spacy\\training\\iob_utils.py:141: UserWarning: [W030] Some entities could not be aligned in the text \"fdf657ad612664d6f363040992f9a93c.pdf\teffective_dat...\" with entities \"[(205, 221, 'effective_date'), (18571, 18587, 'eff...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n", + " warnings.warn(\n", + "100%|██████████| 254/254 [03:46<00:00, 1.12it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 126761.89712290274}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:37<00:00, 1.17it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 4820.764725268617}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:38<00:00, 1.16it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 2961.5335294864003}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:36<00:00, 1.17it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 2213.4668815539726}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:45<00:00, 1.13it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 2510.1365508201484}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:42<00:00, 1.14it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1978.3569879540626}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:45<00:00, 1.13it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 2100.888444804289}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:42<00:00, 1.14it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1876.1361867387686}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1626.6343563280275}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1585.2094743771386}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:33<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1448.3668823448797}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:33<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1332.4421487199395}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1317.8781256441407}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1226.6725347593729}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:33<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1192.035597401795}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:29<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1188.1162035368466}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:29<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1143.4054054386766}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 3791.378015410542}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 994.1768026528883}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1125.7846743055836}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:33<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 9328.369087289433}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1107.7311855398668}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 978.9377890510351}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:33<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1074.4284942306017}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 943.1104732503225}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:33<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 8978.175974158492}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 778.0780063474549}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 761.4204457107562}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:29<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 6052.844040486783}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 784.3454367913984}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 14544.763418495377}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1697.5655376913946}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 769.7356116850165}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 8206.895966353492}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 557.1029228797111}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:33<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 470.8135981211912}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:35<00:00, 1.18it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 667.7976760653506}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 439.1487481479821}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 443.15619316483696}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 417.5046685101658}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:27<00:00, 1.22it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 11523.485496193114}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 397.71168758824444}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 423.49758660694147}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:27<00:00, 1.22it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 296.0573941380508}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 305.5425810442424}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 489.09937505081837}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 430.0638906187689}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:34<00:00, 1.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 352.8424022630411}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 1456.7505189361307}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 320.5520000411898}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:28<00:00, 1.22it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 498.3043399969644}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:30<00:00, 1.21it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 487.8768236587332}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 420.81408914726353}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:31<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 228.4159877122968}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:32<00:00, 1.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 334.51619157337626}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:49<00:00, 1.11it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 256.706434378429}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:38<00:00, 1.16it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 364.7311265643405}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:28<00:00, 1.22it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 185.43898607503672}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 254/254 [03:25<00:00, 1.24it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 222.78402290288602}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 62%|██████▏ | 158/254 [02:04<00:57, 1.68it/s]" ] } ], @@ -272,7 +1214,22 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Test trained model\n", + "for text in trainData:\n", + " doc = nlp(text)\n", + " print('Entities', [(ent.text, ent.label_) for ent in doc.ents])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the model to path\n", + "nlp.to_disk('/NER')" + ] } ], "metadata": { @@ -294,7 +1251,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.8.10" }, "orig_nbformat": 4 },