add plewi support
This commit is contained in:
parent
942e426366
commit
1339121797
8
plewi.sh
Normal file
8
plewi.sh
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
plewi_path=~/PLEWI-polish-errors-correction-challenge/train
|
||||||
|
train_file="$plewi_path"/train.tsv
|
||||||
|
|
||||||
|
xz -d -v "$train_file".xz
|
||||||
|
cat "$train_file" | cut -f 1 > "$plewi_path"/in.tsv
|
||||||
|
cat "$train_file" | cut -f 2 > "$plewi_path"/expected.tsv
|
@ -30,17 +30,13 @@ cd ~/fastBPE
|
|||||||
./fast applybpe "$source_path_train"."$bpe_merges" "$source_path_train" "$codes"
|
./fast applybpe "$source_path_train"."$bpe_merges" "$source_path_train" "$codes"
|
||||||
./fast applybpe "$target_path_train"."$bpe_merges" "$target_path_train" "$codes"
|
./fast applybpe "$target_path_train"."$bpe_merges" "$target_path_train" "$codes"
|
||||||
# get train vocabulary and .yml'ify it
|
# get train vocabulary and .yml'ify it
|
||||||
./fast getvocab "$source_path_train"."$bpe_merges" > "$source_path_train".vocab."$bpe_merges" \
|
for file in "$source_path_train" "$target_path_train"
|
||||||
tee >(cut -f 1 > temp1) | cut -f 2 > temp2
|
do
|
||||||
ex -sc '%s/$/:/|x' temp1
|
./fast getvocab "$file"."$bpe_merges" | tee >( cut -f 1 -d " " > temp1 ) | cut -f 2 -d " " > temp2
|
||||||
paste temp1 temp2 > "$source_path_train".vocab."$bpe_merges".yml
|
ex -sc '%s/$/:/|x' temp1
|
||||||
rm temp1 temp2 "$source_path_train".vocab."$bpe_merges"
|
paste temp1 temp2 > "$file".vocab."$bpe_merges".yml
|
||||||
|
rm temp1 temp2
|
||||||
./fast getvocab "$target_path_train"."$bpe_merges" > "$target_path_train".vocab."$bpe_merges" \
|
done
|
||||||
tee >(cut -f 1 > temp1) | cut -f 2 > temp2
|
|
||||||
ex -sc '%s/$/:/|x' temp1
|
|
||||||
paste temp1 temp2 > "$target_path_train".vocab."$bpe_merges".yml
|
|
||||||
rm temp1 temp2 "$target_path_train".vocab."$bpe_merges"
|
|
||||||
## apply codes to test
|
## apply codes to test
|
||||||
#./fast applybpe "$source_path_test"."$bpe_merges" "$source_path_test" "$codes" "$source_path_train".vocab."$bpe_merges"
|
#./fast applybpe "$source_path_test"."$bpe_merges" "$source_path_test" "$codes" "$source_path_train".vocab."$bpe_merges"
|
||||||
#./fast applybpe "$target_path_test"."$bpe_merges" "$target_path_test" "$codes" "$target_path_train".vocab."$bpe_merges"
|
#./fast applybpe "$target_path_test"."$bpe_merges" "$target_path_test" "$codes" "$target_path_train".vocab."$bpe_merges"
|
||||||
|
@ -7,13 +7,13 @@
|
|||||||
|
|
||||||
# path to corpus
|
# path to corpus
|
||||||
corpus_path="$1"
|
corpus_path="$1"
|
||||||
bpe_merges="$4"
|
bpe_merges="$2"
|
||||||
epochs="$5"
|
epochs="$3"
|
||||||
|
|
||||||
source_file="$corpus_path"/train/in.tsv
|
source_file="$corpus_path"/train/in.tsv."$bpe_merges"
|
||||||
source_vocab="$source_file".vocab."$bpe_merges".yml
|
source_vocab="$source_file".vocab."$bpe_merges".yml
|
||||||
|
|
||||||
target_file="$corpus_path"/train/expected.tsv
|
target_file="$corpus_path"/train/expected.tsv."$bpe_merges"
|
||||||
target_vocab="$target_file".vocab."$bpe_merges".yml
|
target_vocab="$target_file".vocab."$bpe_merges".yml
|
||||||
|
|
||||||
|
|
||||||
@ -37,8 +37,8 @@ target_vocab="$target_file".vocab."$bpe_merges".yml
|
|||||||
--tied-embeddings \
|
--tied-embeddings \
|
||||||
--exponential-smoothing \
|
--exponential-smoothing \
|
||||||
--log ~/train.log \
|
--log ~/train.log \
|
||||||
--after-epochs="$epochs" \
|
--after-epochs="$epochs"
|
||||||
--vocabs "$source_vocab" "$target_vocab"
|
#--vocabs "$source_vocab" "$target_vocab"
|
||||||
|
|
||||||
#--early-stopping 10 \
|
#--early-stopping 10 \
|
||||||
#--model model/model.npz
|
#--model model/model.npz
|
||||||
|
Loading…
Reference in New Issue
Block a user