add plewi support
This commit is contained in:
parent
942e426366
commit
1339121797
8
plewi.sh
Normal file
8
plewi.sh
Normal file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
plewi_path=~/PLEWI-polish-errors-correction-challenge/train
|
||||
train_file="$plewi_path"/train.tsv
|
||||
|
||||
xz -d -v "$train_file".xz
|
||||
cat "$train_file" | cut -f 1 > "$plewi_path"/in.tsv
|
||||
cat "$train_file" | cut -f 2 > "$plewi_path"/expected.tsv
|
@ -30,17 +30,13 @@ cd ~/fastBPE
|
||||
./fast applybpe "$source_path_train"."$bpe_merges" "$source_path_train" "$codes"
|
||||
./fast applybpe "$target_path_train"."$bpe_merges" "$target_path_train" "$codes"
|
||||
# get train vocabulary and .yml'ify it
|
||||
./fast getvocab "$source_path_train"."$bpe_merges" > "$source_path_train".vocab."$bpe_merges" \
|
||||
tee >(cut -f 1 > temp1) | cut -f 2 > temp2
|
||||
ex -sc '%s/$/:/|x' temp1
|
||||
paste temp1 temp2 > "$source_path_train".vocab."$bpe_merges".yml
|
||||
rm temp1 temp2 "$source_path_train".vocab."$bpe_merges"
|
||||
|
||||
./fast getvocab "$target_path_train"."$bpe_merges" > "$target_path_train".vocab."$bpe_merges" \
|
||||
tee >(cut -f 1 > temp1) | cut -f 2 > temp2
|
||||
ex -sc '%s/$/:/|x' temp1
|
||||
paste temp1 temp2 > "$target_path_train".vocab."$bpe_merges".yml
|
||||
rm temp1 temp2 "$target_path_train".vocab."$bpe_merges"
|
||||
for file in "$source_path_train" "$target_path_train"
|
||||
do
|
||||
./fast getvocab "$file"."$bpe_merges" | tee >( cut -f 1 -d " " > temp1 ) | cut -f 2 -d " " > temp2
|
||||
ex -sc '%s/$/:/|x' temp1
|
||||
paste temp1 temp2 > "$file".vocab."$bpe_merges".yml
|
||||
rm temp1 temp2
|
||||
done
|
||||
## apply codes to test
|
||||
#./fast applybpe "$source_path_test"."$bpe_merges" "$source_path_test" "$codes" "$source_path_train".vocab."$bpe_merges"
|
||||
#./fast applybpe "$target_path_test"."$bpe_merges" "$target_path_test" "$codes" "$target_path_train".vocab."$bpe_merges"
|
||||
|
@ -7,13 +7,13 @@
|
||||
|
||||
# path to corpus
|
||||
corpus_path="$1"
|
||||
bpe_merges="$4"
|
||||
epochs="$5"
|
||||
bpe_merges="$2"
|
||||
epochs="$3"
|
||||
|
||||
source_file="$corpus_path"/train/in.tsv
|
||||
source_file="$corpus_path"/train/in.tsv."$bpe_merges"
|
||||
source_vocab="$source_file".vocab."$bpe_merges".yml
|
||||
|
||||
target_file="$corpus_path"/train/expected.tsv
|
||||
target_file="$corpus_path"/train/expected.tsv."$bpe_merges"
|
||||
target_vocab="$target_file".vocab."$bpe_merges".yml
|
||||
|
||||
|
||||
@ -37,8 +37,8 @@ target_vocab="$target_file".vocab."$bpe_merges".yml
|
||||
--tied-embeddings \
|
||||
--exponential-smoothing \
|
||||
--log ~/train.log \
|
||||
--after-epochs="$epochs" \
|
||||
--vocabs "$source_vocab" "$target_vocab"
|
||||
--after-epochs="$epochs"
|
||||
#--vocabs "$source_vocab" "$target_vocab"
|
||||
|
||||
#--early-stopping 10 \
|
||||
#--model model/model.npz
|
||||
|
Loading…
Reference in New Issue
Block a user