update marian-train

This commit is contained in:
jakubknczny 2022-01-31 18:33:10 +01:00
parent 07c440c60e
commit 95f3b9043c
3 changed files with 24 additions and 28 deletions

View File

@ -1,5 +1,6 @@
# PLEWI # NEEDS TO BE UPDATED
- clone PLEWI repo into ~/ ## PLEWI
- clone PLEWI repo into ~
- run following commands: - run following commands:
- git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git - git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git
- cd transfix-train - cd transfix-train

View File

@ -2,26 +2,28 @@
# arguments # arguments
# 1. root of gonito.net challenge-like filestructure # 1. root of gonito.net challenge-like filestructure
# 2. number of BPE merge operations, E.g. 32000 # 2. name of the model and therefore a directory that will contain the model
# 3. expected number of train epochs # 3. expected number of train epochs
# path to corpus # path to corpus
corpus_path="$1" corpus_path="$1"
bpe_merges="$2" model_name="$2"
epochs="$3" epochs="$3"
source_file="$corpus_path"/train/in.tsv."$bpe_merges" source_file="$corpus_path"/train/in.tsv
source_vocab="$source_file".vocab."$bpe_merges".yml source_file_valid="$corpus_path"/test-A/in.tsv
target_file="$corpus_path"/train/expected.tsv."$bpe_merges" target_file="$corpus_path"/train/expected.tsv
target_vocab="$target_file".vocab."$bpe_merges".yml target_file_valid="$corpus_path"/test-A/expected.tsv
mkdir "$model_name"
~/marian/build/marian \ ~/marian/build/marian \
--type transformer \ --type transformer \
--model "$model_name"/model.npz \
--overwrite \ --overwrite \
--train-sets "$source_file" "$target_file" \ --train-sets "$source_file" "$target_file" \
--max-length 100 \ --max-length 200 \
--mini-batch-fit -w 10000 --maxi-batch 1000 \ --mini-batch-fit -w 10000 --maxi-batch 1000 \
--valid-freq 5000 \ --valid-freq 5000 \
--save-freq 5000 \ --save-freq 5000 \
@ -36,15 +38,11 @@ target_vocab="$target_file".vocab."$bpe_merges".yml
--optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \ --optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
--tied-embeddings \ --tied-embeddings \
--exponential-smoothing \ --exponential-smoothing \
--log ~/train.log \ --log "$model_name"/train.log \
--after-epochs="$epochs" --after-epochs="$epochs" \
#--vocabs "$source_vocab" "$target_vocab" --vocabs "$model_name"/vocab.in.spm "$model_name"/vocab.expected.spm \
--valid-log "$model_name"/valid.log \
#--early-stopping 10 \ --valid-metrics cross-entropy perplexity bleu \
#--model model/model.npz --valid-mini-batch 64 \
#--valid-log model/valid.log \ --valid-sets "$source_file_valid" "$target_file_valid" \
#--valid-metrics cross-entropy perplexity translation \ --valid-translation-output "$model_name"/valid.output --quiet-translation
#--valid-sets data/valid.bpe.en data/valid.bpe.de \
#--valid-script-path ./scripts/validate.sh \
#--valid-translation-output data/valid.bpe.en.output --quiet-translation \
#--valid-mini-batch 64 \

View File

@ -2,13 +2,10 @@
# arguments # arguments
# 1. root of gonito.net challenge-like filestructure # 1. root of gonito.net challenge-like filestructure
# 2. number of BPE merge operations, E.g. 32000 # 2. expected number of train epochs
# 3. expected number of train epochs
corpus_path="$1" corpus_path="$1"
bpe_merges="$2" epochs="$2"
epochs="$3"
./scripts/do-fastBPE.sh "$corpus_path" "$bpe_merges" ./scripts/marian-train.sh "$corpus_path" "$epochs"
./scripts/marian-train.sh "$corpus_path" "$bpe_merges" "$epochs"