#!/bin/bash # arguments # 1. root of gonito.net challenge-like filestructure # 2. suffix of source sentences, E.g. en for files like train.en # 3. suffix of source sentences, E.g. pl for files like train.pl # 4. number of BPE merge operations, E.g. 32000 # 5. expected number of train epochs # path to corpus corpus_path="$1" source_suffix="$2" target_suffix="$3" bpe_merges="$4" epochs="$5" source_file="$corpus_path"/train/train."$source_suffix" source_vocab="$source_file".vocab."$bpe_merges" target_file="$corpus_path"/train/train."$target_suffix" target_vocab="$target_file".vocab."$bpe_merges" ../marian/build/marian \ --type transformer \ --overwrite \ --train-sets "$source_file" "$target_file" \ --max-length 100 \ --mini-batch-fit -w 10000 --maxi-batch 1000 \ --valid-freq 5000 \ --save-freq 5000 \ --disp-freq 500 \ --beam-size 6 --normalize 0.6 \ --enc-depth 6 --dec-depth 6 \ --transformer-heads 8 \ --transformer-postprocess-emb d \ --transformer-postprocess dan \ --transformer-dropout 0.1 --label-smoothing 0.1 \ --learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \ --optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \ --tied-embeddings-all \ --exponential-smoothing \ --log model/train.log \ --vocabs "$source_vocab" "$target_vocab" \ --after-epochs "$epochs" \ #--vocabs model/vocab.ende.yml model/vocab.ende.yml \ #--early-stopping 10 \ #--model model/model.npz #--valid-log model/valid.log \ #--valid-metrics cross-entropy perplexity translation \ #--valid-sets data/valid.bpe.en data/valid.bpe.de \ #--valid-script-path ./scripts/validate.sh \ #--valid-translation-output data/valid.bpe.en.output --quiet-translation \ #--valid-mini-batch 64 \