concordia-docker/tools/addOpenSubtitles.sh

20 lines
708 B
Bash
Raw Normal View History

2019-05-19 13:49:02 +02:00
#!/bin/sh
CORPUS_NAME=opensubtitles_plen
CORPUS_PATH=/root/opensubtitles_pack/plen
SRC_LANG_ID=1
TRG_LANG_ID=2
./addSources.py $CORPUS_PATH/ids_sources.txt
./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt
CORPUS_NAME=opensubtitles_enpl
CORPUS_PATH=/root/opensubtitles_pack/enpl
SRC_LANG_ID=2
TRG_LANG_ID=1
./addSources.py $CORPUS_PATH/ids_sources.txt
./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt