update readme
This commit is contained in:
parent
a9ccbe0fda
commit
ae4ed909d8
24
README.md
24
README.md
@ -1,12 +1,16 @@
|
|||||||
# NEEDS TO BE UPDATED
|
# Transfix-train for PLEWI
|
||||||
## PLEWI
|
- clone PLEWI repo into ~/ via following command:
|
||||||
# PLEWI
|
```
|
||||||
- clone PLEWI repo into ~/ via cd ~ ; git clone https://git.wmi.amu.edu.pl/s434704/PLEWI-polish-errors-correction-challenge.git
|
cd ~ ; git clone https://git.wmi.amu.edu.pl/s434704/PLEWI-polish-errors-correction-challenge.git
|
||||||
|
```
|
||||||
- run following commands:
|
- run following commands:
|
||||||
- git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git
|
- ```git clone https://git.wmi.amu.edu.pl/s470607/transfix-train.git```
|
||||||
- cd transfix-train
|
- ```cd transfix-train```
|
||||||
- ./plewi.sh
|
- ```./plewi.sh```
|
||||||
- sudo ./setup.sh (this will take some time)
|
- ```sudo ./setup.sh``` (this will take some time)
|
||||||
- ./train ~/PLEWI-polish-errors-correction-challenge 32000 4
|
- ```./train ~/PLEWI-polish-errors-correction-challenge model-plewi 4```
|
||||||
|
|
||||||
where: 32000 is a number of BPE merge operations, 4 is a number of epochs
|
where:
|
||||||
|
* ~/PLEWI-polish-errors-correction-challenge is a path to gonito repository
|
||||||
|
* model-plewi is a name for the model and the directory where training file will be stored in
|
||||||
|
* 4 is a number of training epochs
|
@ -36,13 +36,13 @@ mkdir "$model_name"
|
|||||||
--transformer-dropout 0.1 --label-smoothing 0.1 \
|
--transformer-dropout 0.1 --label-smoothing 0.1 \
|
||||||
--learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \
|
--learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \
|
||||||
--optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
|
--optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
|
||||||
--tied-embeddings \
|
--tied-embeddings-all \
|
||||||
--exponential-smoothing \
|
--exponential-smoothing \
|
||||||
--log "$model_name"/train.log \
|
--log "$model_name"/train.log \
|
||||||
--after-epochs="$epochs" \
|
--after-epochs="$epochs" \
|
||||||
--vocabs "$model_name"/vocab.in.spm "$model_name"/vocab.expected.spm \
|
--vocabs "$model_name"/vocab.in.spm "$model_name"/vocab.expected.spm \
|
||||||
--valid-log "$model_name"/valid.log \
|
--valid-log "$model_name"/valid.log \
|
||||||
--valid-metrics cross-entropy perplexity bleu \
|
--valid-metrics perplexity bleu \
|
||||||
--valid-mini-batch 64 \
|
--valid-mini-batch 64 \
|
||||||
--valid-sets "$source_file_valid" "$target_file_valid" \
|
--valid-sets "$source_file_valid" "$target_file_valid" \
|
||||||
--valid-translation-output "$model_name"/valid.output --quiet-translation
|
--valid-translation-output "$model_name"/valid.output --quiet-translation
|
||||||
|
8
train.sh
8
train.sh
@ -2,10 +2,12 @@
|
|||||||
|
|
||||||
# arguments
|
# arguments
|
||||||
# 1. root of gonito.net challenge-like filestructure
|
# 1. root of gonito.net challenge-like filestructure
|
||||||
# 2. expected number of train epochs
|
# 2. name of the model and therefore a directory that will contain the model
|
||||||
|
# 3. expected number of train epochs
|
||||||
|
|
||||||
|
|
||||||
corpus_path="$1"
|
corpus_path="$1"
|
||||||
epochs="$2"
|
model_name="$2"
|
||||||
|
epochs="$3"
|
||||||
|
|
||||||
./scripts/marian-train.sh "$corpus_path" "$epochs"
|
./scripts/marian-train.sh "$corpus_path" "$model_model_name" "$epochs"
|
Loading…
Reference in New Issue
Block a user