NER trained in 3 iterations

2022-05-02 18:54:19 +02:00 · 2022-05-02 18:54:19 +02:00 · 6e2d95c303
commit 6e2d95c303
parent 3a56afe615
12 changed files with 76079 additions and 868 deletions
--- a/NER/config.cfg
+++ b/NER/config.cfg
@ -0,0 +1,128 @@
+[paths]
+train = null
+dev = null
+vectors = null
+init_tok2vec = null
+
+[system]
+seed = 0
+gpu_allocator = null
+
+[nlp]
+lang = "en"
+pipeline = ["ner"]
+disabled = []
+before_creation = null
+after_creation = null
+after_pipeline_creation = null
+batch_size = 1000
+tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
+
+[components]
+
+[components.ner]
+factory = "ner"
+incorrect_spans_key = null
+moves = null
+scorer = {"@scorers":"spacy.ner_scorer.v1"}
+update_with_oracle_cut_size = 100
+
+[components.ner.model]
+@architectures = "spacy.TransitionBasedParser.v2"
+state_type = "ner"
+extra_state_tokens = false
+hidden_width = 64
+maxout_pieces = 2
+use_upper = true
+nO = null
+
+[components.ner.model.tok2vec]
+@architectures = "spacy.HashEmbedCNN.v2"
+pretrained_vectors = null
+width = 96
+depth = 4
+embed_size = 2000
+window_size = 1
+maxout_pieces = 3
+subword_features = true
+
+[corpora]
+
+[corpora.dev]
+@readers = "spacy.Corpus.v1"
+path = ${paths.dev}
+gold_preproc = false
+max_length = 0
+limit = 0
+augmenter = null
+
+[corpora.train]
+@readers = "spacy.Corpus.v1"
+path = ${paths.train}
+gold_preproc = false
+max_length = 0
+limit = 0
+augmenter = null
+
+[training]
+seed = ${system.seed}
+gpu_allocator = ${system.gpu_allocator}
+dropout = 0.1
+accumulate_gradient = 1
+patience = 1600
+max_epochs = 0
+max_steps = 20000
+eval_frequency = 200
+frozen_components = []
+annotating_components = []
+dev_corpus = "corpora.dev"
+train_corpus = "corpora.train"
+before_to_disk = null
+
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+get_length = null
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+t = 0.0
+
+[training.logger]
+@loggers = "spacy.ConsoleLogger.v1"
+progress_bar = false
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = false
+eps = 0.00000001
+learn_rate = 0.001
+
+[training.score_weights]
+ents_f = 1.0
+ents_p = 0.0
+ents_r = 0.0
+ents_per_type = null
+
+[pretraining]
+
+[initialize]
+vectors = ${paths.vectors}
+init_tok2vec = ${paths.init_tok2vec}
+vocab_data = null
+lookups = null
+before_init = null
+after_init = null
+
+[initialize.components]
+
+[initialize.tokenizer]
--- a/NER/meta.json
+++ b/NER/meta.json
@ -0,0 +1,36 @@
+{
+  "lang":"en",
+  "name":"pipeline",
+  "version":"0.0.0",
+  "spacy_version":">=3.2.4,<3.3.0",
+  "description":"",
+  "author":"",
+  "email":"",
+  "url":"",
+  "license":"",
+  "spacy_git_version":"b50fe5ec6",
+  "vectors":{
+    "width":0,
+    "vectors":0,
+    "keys":0,
+    "name":null,
+    "mode":"default"
+  },
+  "labels":{
+    "ner":[
+      "effective_date",
+      "jurisdiction",
+      "party",
+      "term"
+    ]
+  },
+  "pipeline":[
+    "ner"
+  ],
+  "components":[
+    "ner"
+  ],
+  "disabled":[
+
+  ]
+}
--- a/NER/ner/cfg
+++ b/NER/ner/cfg
@ -0,0 +1,13 @@
+{
+  "moves":null,
+  "update_with_oracle_cut_size":100,
+  "multitasks":[
+
+  ],
+  "min_action_freq":1,
+  "learn_tokens":false,
+  "beam_width":1,
+  "beam_density":0.0,
+  "beam_update_prob":0.0,
+  "incorrect_spans_key":null
+}
--- a/NER/ner/model
+++ b/NER/ner/model
--- a/NER/ner/moves
+++ b/NER/ner/moves
@ -0,0 +1 @@
+‚¥movesÚ{"0":{},"1":{"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"2":{"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"3":{"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"4":{"":1,"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"5":{"":1}}£cfg<66>§neg_keyÀ
--- a/NER/tokenizer
+++ b/NER/tokenizer
--- a/NER/vocab/key2row
+++ b/NER/vocab/key2row
@ -0,0 +1 @@
+<EFBFBD>
--- a/NER/vocab/lookups.bin
+++ b/NER/vocab/lookups.bin
@ -0,0 +1 @@
+<EFBFBD>
--- a/NER/vocab/strings.json
+++ b/NER/vocab/strings.json
--- a/NER/vocab/vectors
+++ b/NER/vocab/vectors
--- a/NER/vocab/vectors.cfg
+++ b/NER/vocab/vectors.cfg
@ -0,0 +1,3 @@
+{
+  "mode":"default"
+}
--- a/main.ipynb
+++ b/main.ipynb
				`@ -0,0 +1 @@`
				`‚¥movesÚ{"0":{},"1":{"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"2":{"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"3":{"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"4":{"":1,"effective_date":-1,"jurisdiction":-2,"party":-3,"term":-4},"5":{"":1}}£cfg<66>§neg_keyÀ`