update marian-train
This commit is contained in:
parent
07c440c60e
commit
95f3b9043c
@ -1,5 +1,6 @@
|
|||||||
# PLEWI
|
# NEEDS TO BE UPDATED
|
||||||
- clone PLEWI repo into ~/
|
## PLEWI
|
||||||
|
- clone PLEWI repo into ~
|
||||||
- run following commands:
|
- run following commands:
|
||||||
- git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git
|
- git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git
|
||||||
- cd transfix-train
|
- cd transfix-train
|
||||||
|
@ -2,26 +2,28 @@
|
|||||||
|
|
||||||
# arguments
|
# arguments
|
||||||
# 1. root of gonito.net challenge-like filestructure
|
# 1. root of gonito.net challenge-like filestructure
|
||||||
# 2. number of BPE merge operations, E.g. 32000
|
# 2. name of the model and therefore a directory that will contain the model
|
||||||
# 3. expected number of train epochs
|
# 3. expected number of train epochs
|
||||||
|
|
||||||
# path to corpus
|
# path to corpus
|
||||||
corpus_path="$1"
|
corpus_path="$1"
|
||||||
bpe_merges="$2"
|
model_name="$2"
|
||||||
epochs="$3"
|
epochs="$3"
|
||||||
|
|
||||||
source_file="$corpus_path"/train/in.tsv."$bpe_merges"
|
source_file="$corpus_path"/train/in.tsv
|
||||||
source_vocab="$source_file".vocab."$bpe_merges".yml
|
source_file_valid="$corpus_path"/test-A/in.tsv
|
||||||
|
|
||||||
target_file="$corpus_path"/train/expected.tsv."$bpe_merges"
|
target_file="$corpus_path"/train/expected.tsv
|
||||||
target_vocab="$target_file".vocab."$bpe_merges".yml
|
target_file_valid="$corpus_path"/test-A/expected.tsv
|
||||||
|
|
||||||
|
mkdir "$model_name"
|
||||||
|
|
||||||
~/marian/build/marian \
|
~/marian/build/marian \
|
||||||
--type transformer \
|
--type transformer \
|
||||||
|
--model "$model_name"/model.npz \
|
||||||
--overwrite \
|
--overwrite \
|
||||||
--train-sets "$source_file" "$target_file" \
|
--train-sets "$source_file" "$target_file" \
|
||||||
--max-length 100 \
|
--max-length 200 \
|
||||||
--mini-batch-fit -w 10000 --maxi-batch 1000 \
|
--mini-batch-fit -w 10000 --maxi-batch 1000 \
|
||||||
--valid-freq 5000 \
|
--valid-freq 5000 \
|
||||||
--save-freq 5000 \
|
--save-freq 5000 \
|
||||||
@ -36,15 +38,11 @@ target_vocab="$target_file".vocab."$bpe_merges".yml
|
|||||||
--optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
|
--optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
|
||||||
--tied-embeddings \
|
--tied-embeddings \
|
||||||
--exponential-smoothing \
|
--exponential-smoothing \
|
||||||
--log ~/train.log \
|
--log "$model_name"/train.log \
|
||||||
--after-epochs="$epochs"
|
--after-epochs="$epochs" \
|
||||||
#--vocabs "$source_vocab" "$target_vocab"
|
--vocabs "$model_name"/vocab.in.spm "$model_name"/vocab.expected.spm \
|
||||||
|
--valid-log "$model_name"/valid.log \
|
||||||
#--early-stopping 10 \
|
--valid-metrics cross-entropy perplexity bleu \
|
||||||
#--model model/model.npz
|
--valid-mini-batch 64 \
|
||||||
#--valid-log model/valid.log \
|
--valid-sets "$source_file_valid" "$target_file_valid" \
|
||||||
#--valid-metrics cross-entropy perplexity translation \
|
--valid-translation-output "$model_name"/valid.output --quiet-translation
|
||||||
#--valid-sets data/valid.bpe.en data/valid.bpe.de \
|
|
||||||
#--valid-script-path ./scripts/validate.sh \
|
|
||||||
#--valid-translation-output data/valid.bpe.en.output --quiet-translation \
|
|
||||||
#--valid-mini-batch 64 \
|
|
||||||
|
9
train.sh
9
train.sh
@ -2,13 +2,10 @@
|
|||||||
|
|
||||||
# arguments
|
# arguments
|
||||||
# 1. root of gonito.net challenge-like filestructure
|
# 1. root of gonito.net challenge-like filestructure
|
||||||
# 2. number of BPE merge operations, E.g. 32000
|
# 2. expected number of train epochs
|
||||||
# 3. expected number of train epochs
|
|
||||||
|
|
||||||
|
|
||||||
corpus_path="$1"
|
corpus_path="$1"
|
||||||
bpe_merges="$2"
|
epochs="$2"
|
||||||
epochs="$3"
|
|
||||||
|
|
||||||
./scripts/do-fastBPE.sh "$corpus_path" "$bpe_merges"
|
./scripts/marian-train.sh "$corpus_path" "$epochs"
|
||||||
./scripts/marian-train.sh "$corpus_path" "$bpe_merges" "$epochs"
|
|
Loading…
Reference in New Issue
Block a user