diff --git a/tests/addOpenSubtitles.sh b/tests/addOpenSubtitles.sh new file mode 100755 index 0000000..86c7917 --- /dev/null +++ b/tests/addOpenSubtitles.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +CORPUS_NAME=opensubtitles_plen +CORPUS_PATH=/root/opensubtitles_pack/plen +SRC_LANG_ID=1 +TRG_LANG_ID=2 + +./addSources.py $CORPUS_PATH/ids_sources.txt +./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt + +CORPUS_NAME=opensubtitles_enpl +CORPUS_PATH=/root/opensubtitles_pack/enpl +SRC_LANG_ID=2 +TRG_LANG_ID=1 + +./addSources.py $CORPUS_PATH/ids_sources.txt +./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt + +