#!/usr/bin/env bash set -e set -x TEXT=data fairseq-preprocess \ --only-source --nwordssrc 50000 \ --trainpref $TEXT/train.txt \ --validpref $TEXT/valid.txt \ --testpref $TEXT/test.txt \ --destdir data-bin/lm-spm-bpe \ --workers 8