temporal-t5/1-process.sh

12 lines
503 B
Bash

head -n 20000 challenging-america-full-train-dump-2021-10-26.tsv > dev.txt
tail -n 317833 challenging-america-full-train-dump-2021-10-26.tsv > train.txt
cat dev.txt | parallel --pipe -j 50 python append-date.py '{#}' > dev-splitted.txt
cat train.txt | parallel --pipe -j 50 python append-date.py '{#}' > train-splitted.txt
shuf dev-splitted.txt > dev-splitted-shuf.txt
shuf train-splitted.txt > train-splitted-shuf.txt
rm dev-splitted.txt train-splitted.txt dev-splitted.txt train-splitted.txt