petite-difference-challenge.../1-create-data.sh

12 lines
415 B
Bash
Executable File

#!/usr/bin/env bash
set -e
set -x
# Create spm vocab
# spm_train --input=train/in.tsv --model_prefix=vocab_spm_bpe --model_type=bpe --vocab_size=50000 --pad_id 1 --bos_id 2 --eos_id 3
spm_encode --model vocab_spm_bpe.model < data/train/in.tsv > data/train.txt
spm_encode --model vocab_spm_bpe.model < data/dev-0/in.tsv > data/valid.txt
spm_encode --model vocab_spm_bpe.model < data/test-A/in.tsv > data/test.txt