concordia-server/mgiza-aligner/corpus-compilator/filter.sh
2017-07-26 13:29:22 +02:00

13 lines
458 B
Bash
Executable File

#!/bin/sh
DICTIONARY_NAME=classyf_popular_medicine
CORPUS_NAME=opus
make clean-filtering
make dictionaries/$DICTIONARY_NAME.lem
./get_corpus_lines.py dictionaries/$DICTIONARY_NAME.lem corpora/$CORPUS_NAME/report.txt > corpora/$CORPUS_NAME/corpus_lines.txt
./compile.py corpora/$CORPUS_NAME/src_clean.txt corpora/$CORPUS_NAME/trg_clean.txt corpora/$CORPUS_NAME/corpus_lines.txt corpora/$CORPUS_NAME/src_filtered.txt corpora/$CORPUS_NAME/trg_filtered.txt