20 lines
708 B
Bash
20 lines
708 B
Bash
|
#!/bin/sh
|
||
|
|
||
|
CORPUS_NAME=opensubtitles_plen
|
||
|
CORPUS_PATH=/root/opensubtitles_pack/plen
|
||
|
SRC_LANG_ID=1
|
||
|
TRG_LANG_ID=2
|
||
|
|
||
|
./addSources.py $CORPUS_PATH/ids_sources.txt
|
||
|
./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt
|
||
|
|
||
|
CORPUS_NAME=opensubtitles_enpl
|
||
|
CORPUS_PATH=/root/opensubtitles_pack/enpl
|
||
|
SRC_LANG_ID=2
|
||
|
TRG_LANG_ID=1
|
||
|
|
||
|
./addSources.py $CORPUS_PATH/ids_sources.txt
|
||
|
./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt
|
||
|
|
||
|
|