SRC_LANG=pl TRG_LANG=en CORPUS_NAME=opensubtitles_sample SEPARATOR=@\#@ DICTIONARY_WEIGHT=3 all: corpora/$(CORPUS_NAME)/alignments.txt corpora/$(CORPUS_NAME)/src_clean.txt corpora/$(CORPUS_NAME)/src_clean.lem corpora/$(CORPUS_NAME)/trg_clean.txt clean: rm -f corpora/$(CORPUS_NAME)/*.lem rm -f corpora/$(CORPUS_NAME)/*.dict rm -f corpora/$(CORPUS_NAME)/src_clean.txt rm -f corpora/$(CORPUS_NAME)/src_clean.lem rm -f corpora/$(CORPUS_NAME)/trg_clean.txt rm -f corpora/$(CORPUS_NAME)/ids_clean.txt rm -f corpora/$(CORPUS_NAME)/falign_corpus.txt rm -f corpora/$(CORPUS_NAME)/falign_result.txt rm -f corpora/$(CORPUS_NAME)/alignments.txt corpora/$(CORPUS_NAME)/alignments.txt: corpora/$(CORPUS_NAME)/falign_result.txt corpora/$(CORPUS_NAME)/src_clean.lem ./get_alignments.py corpora/$(CORPUS_NAME)/falign_result.txt corpora/$(CORPUS_NAME)/src_clean.lem > $@ corpora/$(CORPUS_NAME)/src.dict: ./collect_dict.py $(SRC_LANG) $(TRG_LANG) $(DICTIONARY_WEIGHT) > $@ corpora/$(CORPUS_NAME)/trg.dict: ./collect_dict.py $(TRG_LANG) $(SRC_LANG) $(DICTIONARY_WEIGHT) > $@ corpora/$(CORPUS_NAME)/falign_result.txt: corpora/$(CORPUS_NAME)/falign_corpus.txt ./fast_align -i $< -d -o -v > $@ corpora/$(CORPUS_NAME)/src_clean.txt corpora/$(CORPUS_NAME)/src_clean.lem corpora/$(CORPUS_NAME)/trg_clean.txt corpora/$(CORPUS_NAME)/ids_clean.txt corpora/$(CORPUS_NAME)/falign_corpus.txt: corpora/$(CORPUS_NAME)/src.txt corpora/$(CORPUS_NAME)/trg.txt corpora/$(CORPUS_NAME)/ids.txt corpora/$(CORPUS_NAME)/src.lem corpora/$(CORPUS_NAME)/trg.lem corpora/$(CORPUS_NAME)/src.dict corpora/$(CORPUS_NAME)/trg.dict ./prepare_corpus.py corpora/$(CORPUS_NAME)/src.norm corpora/$(CORPUS_NAME)/trg.norm corpora/$(CORPUS_NAME)/ids.txt corpora/$(CORPUS_NAME)/src.lem corpora/$(CORPUS_NAME)/trg.lem corpora/$(CORPUS_NAME)/src.dict corpora/$(CORPUS_NAME)/trg.dict corpora/$(CORPUS_NAME)/src_clean.txt corpora/$(CORPUS_NAME)/src_clean.lem corpora/$(CORPUS_NAME)/trg_clean.txt corpora/$(CORPUS_NAME)/ids_clean.txt corpora/$(CORPUS_NAME)/falign_corpus.txt $(SRC_LANG) $(TRG_LANG) corpora/$(CORPUS_NAME)/src.lem: corpora/$(CORPUS_NAME)/src.tok ./sentence_lemmatizer.py $< $(SRC_LANG) $@ corpora/$(CORPUS_NAME)/trg.lem: corpora/$(CORPUS_NAME)/trg.tok ./sentence_lemmatizer.py $< $(TRG_LANG) $@ corpora/$(CORPUS_NAME)/trg.tok: corpora/$(CORPUS_NAME)/trg.txt /usr/local/bin/concordia-sentence-tokenizer -c /code/prod/resources/concordia-config/concordia.cfg < $< > $@ corpora/$(CORPUS_NAME)/src.tok: corpora/$(CORPUS_NAME)/src.txt /usr/local/bin/concordia-sentence-tokenizer -c /code/prod/resources/concordia-config/concordia.cfg < $< > $@