12 lines
415 B
Bash
12 lines
415 B
Bash
|
#!/usr/bin/env bash
|
||
|
|
||
|
set -e
|
||
|
set -x
|
||
|
|
||
|
# Create spm vocab
|
||
|
# spm_train --input=train/in.tsv --model_prefix=vocab_spm_bpe --model_type=bpe --vocab_size=50000 --pad_id 1 --bos_id 2 --eos_id 3
|
||
|
|
||
|
spm_encode --model vocab_spm_bpe.model < data/train/in.tsv > data/train.txt
|
||
|
spm_encode --model vocab_spm_bpe.model < data/dev-0/in.tsv > data/valid.txt
|
||
|
spm_encode --model vocab_spm_bpe.model < data/test-A/in.tsv > data/test.txt
|