#!/bin/bash # arguments # 1. root of gonito.net challenge-like filestructure # 2. name of the model and therefore a directory that will contain the model # 3. expected number of train epochs # path to corpus corpus_path="$1" model_name="$2" epochs="$3" source_file="$corpus_path"/train/in.tsv source_file_valid="$corpus_path"/test-A/in.tsv target_file="$corpus_path"/train/expected.tsv target_file_valid="$corpus_path"/test-A/expected.tsv mkdir "$model_name" ~/marian/build/marian \ --type transformer \ --model "$model_name"/model.npz \ --overwrite \ --train-sets "$source_file" "$target_file" \ --max-length 200 \ --mini-batch-fit -w 10000 --maxi-batch 1000 \ --valid-freq 5000 \ --save-freq 5000 \ --disp-freq 500 \ --beam-size 6 --normalize 0.6 \ --enc-depth 6 --dec-depth 6 \ --transformer-heads 8 \ --transformer-postprocess-emb d \ --transformer-postprocess dan \ --transformer-dropout 0.1 --label-smoothing 0.1 \ --learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \ --optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \ --tied-embeddings \ --exponential-smoothing \ --log "$model_name"/train.log \ --after-epochs="$epochs" \ --vocabs "$model_name"/vocab.in.spm "$model_name"/vocab.expected.spm \ --valid-log "$model_name"/valid.log \ --valid-metrics cross-entropy perplexity bleu \ --valid-mini-batch 64 \ --valid-sets "$source_file_valid" "$target_file_valid" \ --valid-translation-output "$model_name"/valid.output --quiet-translation