54 lines
1.6 KiB
Bash
54 lines
1.6 KiB
Bash
|
#!/bin/bash
|
||
|
|
||
|
# arguments
|
||
|
# 1. root of gonito.net challenge-like filestructure
|
||
|
# 2. suffix of source sentences, E.g. en for files like train.en
|
||
|
# 3. suffix of source sentences, E.g. pl for files like train.pl
|
||
|
# 4. number of BPE merge operations, E.g. 32000
|
||
|
# 5. expected number of train epochs
|
||
|
|
||
|
# path to corpus
|
||
|
corpus_path="$1"
|
||
|
source_suffix="$2"
|
||
|
target_suffix="$3"
|
||
|
bpe_merges="$4"
|
||
|
epochs="$5"
|
||
|
|
||
|
source_file="$corpus_path"/train/train."$source_suffix"
|
||
|
source_vocab="$source_file".vocab."$bpe_merges"
|
||
|
|
||
|
target_file="$corpus_path"/train/train."$target_suffix"
|
||
|
target_vocab="$target_file".vocab."$bpe_merges"
|
||
|
|
||
|
|
||
|
./marian/build/marian \
|
||
|
--model model/model.npz --type transformer \
|
||
|
--overwrite
|
||
|
--train-sets "$source_file" "$target_file" \
|
||
|
--max-length 100 \
|
||
|
--vocabs "$source_vocab" "$target_vocab" \
|
||
|
#--vocabs model/vocab.ende.yml model/vocab.ende.yml \
|
||
|
--mini-batch-fit -w 10000 --maxi-batch 1000 \
|
||
|
--after_epochs "$epochs" \
|
||
|
#--early-stopping 10 \
|
||
|
--valid-freq 5000 \
|
||
|
--save-freq 5000 \
|
||
|
--disp-freq 500 \
|
||
|
--beam-size 6 --normalize 0.6 \
|
||
|
--enc-depth 6 --dec-depth 6 \
|
||
|
--transformer-heads 8 \
|
||
|
--transformer-postprocess-emb d \
|
||
|
--transformer-postprocess dan \
|
||
|
--transformer-dropout 0.1 --label-smoothing 0.1 \
|
||
|
--learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \
|
||
|
--optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
|
||
|
--tied-embeddings-all \
|
||
|
--exponential-smoothing
|
||
|
--log model/train.log \
|
||
|
#--valid-log model/valid.log \
|
||
|
#--valid-metrics cross-entropy perplexity translation \
|
||
|
#--valid-sets data/valid.bpe.en data/valid.bpe.de \
|
||
|
#--valid-script-path ./scripts/validate.sh \
|
||
|
#--valid-translation-output data/valid.bpe.en.output --quiet-translation \
|
||
|
#--valid-mini-batch 64 \
|