From ae4ed909d8507ad8017140a7b9a20b0656e6c85e Mon Sep 17 00:00:00 2001
From: jakubknczny <jakubknczny@gmail.com>
Date: Tue, 1 Feb 2022 12:43:26 +0100
Subject: [PATCH] update readme

---
 README.md               | 24 ++++++++++++++----------
 scripts/marian-train.sh |  4 ++--
 train.sh                |  8 +++++---
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index b04ad6a..2191f98 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,16 @@
-# NEEDS TO BE UPDATED
-## PLEWI 
-# PLEWI
-- clone PLEWI repo into ~/ via cd ~ ; git clone https://git.wmi.amu.edu.pl/s434704/PLEWI-polish-errors-correction-challenge.git
+# Transfix-train for PLEWI
+- clone PLEWI repo into ~/ via following command:
+```
+cd ~ ; git clone https://git.wmi.amu.edu.pl/s434704/PLEWI-polish-errors-correction-challenge.git
+```
 - run following commands:
-  - git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git
-  - cd transfix-train
-  - ./plewi.sh
-  - sudo ./setup.sh (this will take some time)
-  - ./train ~/PLEWI-polish-errors-correction-challenge 32000 4
+  - ```git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git```
+  - ```cd transfix-train```
+  - ```./plewi.sh```
+  - ```sudo ./setup.sh``` (this will take some time)
+  - ```./train ~/PLEWI-polish-errors-correction-challenge model-plewi 4```
 
-where: 32000 is a number of BPE merge operations, 4 is a number of epochs
\ No newline at end of file
+where: 
+* ~/PLEWI-polish-errors-correction-challenge is a path to gonito repository
+* model-plewi is a name for the model and the directory where training file will be stored in
+* 4 is a number of training epochs
\ No newline at end of file
diff --git a/scripts/marian-train.sh b/scripts/marian-train.sh
index a91b430..0095dc7 100755
--- a/scripts/marian-train.sh
+++ b/scripts/marian-train.sh
@@ -36,13 +36,13 @@ mkdir "$model_name"
 --transformer-dropout 0.1 --label-smoothing 0.1 \
 --learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \
 --optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
---tied-embeddings \
+--tied-embeddings-all \
 --exponential-smoothing \
 --log "$model_name"/train.log \
 --after-epochs="$epochs" \
 --vocabs "$model_name"/vocab.in.spm "$model_name"/vocab.expected.spm \
 --valid-log "$model_name"/valid.log \
---valid-metrics cross-entropy perplexity bleu \
+--valid-metrics perplexity bleu \
 --valid-mini-batch 64 \
 --valid-sets "$source_file_valid" "$target_file_valid" \
 --valid-translation-output "$model_name"/valid.output --quiet-translation
diff --git a/train.sh b/train.sh
index 8b0f796..cb0641a 100755
--- a/train.sh
+++ b/train.sh
@@ -2,10 +2,12 @@
 
 # arguments
 # 1. root of gonito.net challenge-like filestructure
-# 2. expected number of train epochs
+# 2. name of the model and therefore a directory that will contain the model
+# 3. expected number of train epochs
 
 
 corpus_path="$1"
-epochs="$2"
+model_name="$2"
+epochs="$3"
 
-./scripts/marian-train.sh "$corpus_path" "$epochs"
\ No newline at end of file
+./scripts/marian-train.sh "$corpus_path" "$model_model_name" "$epochs"
\ No newline at end of file