From ae4ed909d8507ad8017140a7b9a20b0656e6c85e Mon Sep 17 00:00:00 2001 From: jakubknczny Date: Tue, 1 Feb 2022 12:43:26 +0100 Subject: [PATCH] update readme --- README.md | 24 ++++++++++++++---------- scripts/marian-train.sh | 4 ++-- train.sh | 8 +++++--- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index b04ad6a..2191f98 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,16 @@ -# NEEDS TO BE UPDATED -## PLEWI -# PLEWI -- clone PLEWI repo into ~/ via cd ~ ; git clone https://git.wmi.amu.edu.pl/s434704/PLEWI-polish-errors-correction-challenge.git +# Transfix-train for PLEWI +- clone PLEWI repo into ~/ via following command: +``` +cd ~ ; git clone https://git.wmi.amu.edu.pl/s434704/PLEWI-polish-errors-correction-challenge.git +``` - run following commands: - - git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git - - cd transfix-train - - ./plewi.sh - - sudo ./setup.sh (this will take some time) - - ./train ~/PLEWI-polish-errors-correction-challenge 32000 4 + - ```git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git``` + - ```cd transfix-train``` + - ```./plewi.sh``` + - ```sudo ./setup.sh``` (this will take some time) + - ```./train ~/PLEWI-polish-errors-correction-challenge model-plewi 4``` -where: 32000 is a number of BPE merge operations, 4 is a number of epochs \ No newline at end of file +where: +* ~/PLEWI-polish-errors-correction-challenge is a path to gonito repository +* model-plewi is a name for the model and the directory where training file will be stored in +* 4 is a number of training epochs \ No newline at end of file diff --git a/scripts/marian-train.sh b/scripts/marian-train.sh index a91b430..0095dc7 100755 --- a/scripts/marian-train.sh +++ b/scripts/marian-train.sh @@ -36,13 +36,13 @@ mkdir "$model_name" --transformer-dropout 0.1 --label-smoothing 0.1 \ --learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \ --optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \ ---tied-embeddings \ +--tied-embeddings-all \ --exponential-smoothing \ --log "$model_name"/train.log \ --after-epochs="$epochs" \ --vocabs "$model_name"/vocab.in.spm "$model_name"/vocab.expected.spm \ --valid-log "$model_name"/valid.log \ ---valid-metrics cross-entropy perplexity bleu \ +--valid-metrics perplexity bleu \ --valid-mini-batch 64 \ --valid-sets "$source_file_valid" "$target_file_valid" \ --valid-translation-output "$model_name"/valid.output --quiet-translation diff --git a/train.sh b/train.sh index 8b0f796..cb0641a 100755 --- a/train.sh +++ b/train.sh @@ -2,10 +2,12 @@ # arguments # 1. root of gonito.net challenge-like filestructure -# 2. expected number of train epochs +# 2. name of the model and therefore a directory that will contain the model +# 3. expected number of train epochs corpus_path="$1" -epochs="$2" +model_name="$2" +epochs="$3" -./scripts/marian-train.sh "$corpus_path" "$epochs" \ No newline at end of file +./scripts/marian-train.sh "$corpus_path" "$model_model_name" "$epochs" \ No newline at end of file