NER trained in 100 iterations

This commit is contained in:
s443930 2022-05-03 18:41:31 +02:00
parent 6e2d95c303
commit f28d4d4789
8 changed files with 77412 additions and 76061 deletions

View File

@ -1,128 +1,128 @@
[paths] [paths]
train = null train = null
dev = null dev = null
vectors = null vectors = null
init_tok2vec = null init_tok2vec = null
[system] [system]
seed = 0 seed = 0
gpu_allocator = null gpu_allocator = null
[nlp] [nlp]
lang = "en" lang = "en"
pipeline = ["ner"] pipeline = ["ner"]
disabled = [] disabled = []
before_creation = null before_creation = null
after_creation = null after_creation = null
after_pipeline_creation = null after_pipeline_creation = null
batch_size = 1000 batch_size = 1000
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"} tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
[components] [components]
[components.ner] [components.ner]
factory = "ner" factory = "ner"
incorrect_spans_key = null incorrect_spans_key = null
moves = null moves = null
scorer = {"@scorers":"spacy.ner_scorer.v1"} scorer = {"@scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100 update_with_oracle_cut_size = 100
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2" @architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner" state_type = "ner"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = true use_upper = true
nO = null nO = null
[components.ner.model.tok2vec] [components.ner.model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v2" @architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null pretrained_vectors = null
width = 96 width = 96
depth = 4 depth = 4
embed_size = 2000 embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
[corpora] [corpora]
[corpora.dev] [corpora.dev]
@readers = "spacy.Corpus.v1" @readers = "spacy.Corpus.v1"
path = ${paths.dev} path = ${paths.dev}
gold_preproc = false gold_preproc = false
max_length = 0 max_length = 0
limit = 0 limit = 0
augmenter = null augmenter = null
[corpora.train] [corpora.train]
@readers = "spacy.Corpus.v1" @readers = "spacy.Corpus.v1"
path = ${paths.train} path = ${paths.train}
gold_preproc = false gold_preproc = false
max_length = 0 max_length = 0
limit = 0 limit = 0
augmenter = null augmenter = null
[training] [training]
seed = ${system.seed} seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator} gpu_allocator = ${system.gpu_allocator}
dropout = 0.1 dropout = 0.1
accumulate_gradient = 1 accumulate_gradient = 1
patience = 1600 patience = 1600
max_epochs = 0 max_epochs = 0
max_steps = 20000 max_steps = 20000
eval_frequency = 200 eval_frequency = 200
frozen_components = [] frozen_components = []
annotating_components = [] annotating_components = []
dev_corpus = "corpora.dev" dev_corpus = "corpora.dev"
train_corpus = "corpora.train" train_corpus = "corpora.train"
before_to_disk = null before_to_disk = null
[training.batcher] [training.batcher]
@batchers = "spacy.batch_by_words.v1" @batchers = "spacy.batch_by_words.v1"
discard_oversize = false discard_oversize = false
tolerance = 0.2 tolerance = 0.2
get_length = null get_length = null
[training.batcher.size] [training.batcher.size]
@schedules = "compounding.v1" @schedules = "compounding.v1"
start = 100 start = 100
stop = 1000 stop = 1000
compound = 1.001 compound = 1.001
t = 0.0 t = 0.0
[training.logger] [training.logger]
@loggers = "spacy.ConsoleLogger.v1" @loggers = "spacy.ConsoleLogger.v1"
progress_bar = false progress_bar = false
[training.optimizer] [training.optimizer]
@optimizers = "Adam.v1" @optimizers = "Adam.v1"
beta1 = 0.9 beta1 = 0.9
beta2 = 0.999 beta2 = 0.999
L2_is_weight_decay = true L2_is_weight_decay = true
L2 = 0.01 L2 = 0.01
grad_clip = 1.0 grad_clip = 1.0
use_averages = false use_averages = false
eps = 0.00000001 eps = 0.00000001
learn_rate = 0.001 learn_rate = 0.001
[training.score_weights] [training.score_weights]
ents_f = 1.0 ents_f = 1.0
ents_p = 0.0 ents_p = 0.0
ents_r = 0.0 ents_r = 0.0
ents_per_type = null ents_per_type = null
[pretraining] [pretraining]
[initialize] [initialize]
vectors = ${paths.vectors} vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec} init_tok2vec = ${paths.init_tok2vec}
vocab_data = null vocab_data = null
lookups = null lookups = null
before_init = null before_init = null
after_init = null after_init = null
[initialize.components] [initialize.components]
[initialize.tokenizer] [initialize.tokenizer]

View File

@ -1,36 +1,36 @@
{ {
"lang":"en", "lang":"en",
"name":"pipeline", "name":"pipeline",
"version":"0.0.0", "version":"0.0.0",
"spacy_version":">=3.2.4,<3.3.0", "spacy_version":">=3.3.0,<3.4.0",
"description":"", "description":"",
"author":"", "author":"",
"email":"", "email":"",
"url":"", "url":"",
"license":"", "license":"",
"spacy_git_version":"b50fe5ec6", "spacy_git_version":"497a708c7",
"vectors":{ "vectors":{
"width":0, "width":0,
"vectors":0, "vectors":0,
"keys":0, "keys":0,
"name":null, "name":null,
"mode":"default" "mode":"default"
}, },
"labels":{ "labels":{
"ner":[ "ner":[
"effective_date", "effective_date",
"jurisdiction", "jurisdiction",
"party", "party",
"term" "term"
] ]
}, },
"pipeline":[ "pipeline":[
"ner" "ner"
], ],
"components":[ "components":[
"ner" "ner"
], ],
"disabled":[ "disabled":[
] ]
} }

View File

@ -1,13 +1,13 @@
{ {
"moves":null, "moves":null,
"update_with_oracle_cut_size":100, "update_with_oracle_cut_size":100,
"multitasks":[ "multitasks":[
], ],
"min_action_freq":1, "min_action_freq":1,
"learn_tokens":false, "learn_tokens":false,
"beam_width":1, "beam_width":1,
"beam_density":0.0, "beam_density":0.0,
"beam_update_prob":0.0, "beam_update_prob":0.0,
"incorrect_spans_key":null "incorrect_spans_key":null
} }

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,3 @@
{ {
"mode":"default" "mode":"default"
} }

1526
main.ipynb

File diff suppressed because it is too large Load Diff