#!/bin/bash # arguments # 1. root of gonito.net challenge-like filestructure # 2. number of BPE merge operations, E.g. 32000 # 3. expected number of train epochs # path to corpus corpus_path="$1" bpe_merges="$4" epochs="$5" source_file="$corpus_path"/train/in.tsv source_vocab="$source_file".vocab."$bpe_merges".yml target_file="$corpus_path"/train/expected.tsv target_vocab="$target_file".vocab."$bpe_merges".yml ~/marian/build/marian \ --type transformer \ --overwrite \ --train-sets "$source_file" "$target_file" \ --max-length 100 \ --mini-batch-fit -w 10000 --maxi-batch 1000 \ --valid-freq 5000 \ --save-freq 5000 \ --disp-freq 500 \ --beam-size 6 --normalize 0.6 \ --enc-depth 6 --dec-depth 6 \ --transformer-heads 8 \ --transformer-postprocess-emb d \ --transformer-postprocess dan \ --transformer-dropout 0.1 --label-smoothing 0.1 \ --learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \ --optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \ --tied-embeddings \ --exponential-smoothing \ --log ~/train.log \ --after-epochs="$epochs" \ --vocabs "$source_vocab" "$target_vocab" #--early-stopping 10 \ #--model model/model.npz #--valid-log model/valid.log \ #--valid-metrics cross-entropy perplexity translation \ #--valid-sets data/valid.bpe.en data/valid.bpe.de \ #--valid-script-path ./scripts/validate.sh \ #--valid-translation-output data/valid.bpe.en.output --quiet-translation \ #--valid-mini-batch 64 \