added lemmatization
This commit is contained in:
parent
015a916d20
commit
4883cce8a5
@ -8,6 +8,7 @@ all: corpora/$(CORPUS_NAME)/giza.cfg corpora/$(CORPUS_NAME)/src.low_trg.low.cooc
|
|||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f corpora/$(CORPUS_NAME)/*.tok
|
rm -f corpora/$(CORPUS_NAME)/*.tok
|
||||||
|
rm -f corpora/$(CORPUS_NAME)/*.lem
|
||||||
rm -f corpora/$(CORPUS_NAME)/*.low
|
rm -f corpora/$(CORPUS_NAME)/*.low
|
||||||
rm -f corpora/$(CORPUS_NAME)/*.classes
|
rm -f corpora/$(CORPUS_NAME)/*.classes
|
||||||
rm -f corpora/$(CORPUS_NAME)/*.classes.cats
|
rm -f corpora/$(CORPUS_NAME)/*.classes.cats
|
||||||
@ -29,9 +30,16 @@ corpora/$(CORPUS_NAME)/src.low_trg.low.snt corpora/$(CORPUS_NAME)/trg.low_src.lo
|
|||||||
corpora/$(CORPUS_NAME)/%.classes: corpora/$(CORPUS_NAME)/%.low
|
corpora/$(CORPUS_NAME)/%.classes: corpora/$(CORPUS_NAME)/%.low
|
||||||
mgiza/mgizapp/bin/mkcls -n10 -p$< -V$@
|
mgiza/mgizapp/bin/mkcls -n10 -p$< -V$@
|
||||||
|
|
||||||
corpora/$(CORPUS_NAME)/%.low: corpora/$(CORPUS_NAME)/%.tok
|
corpora/$(CORPUS_NAME)/%.low: corpora/$(CORPUS_NAME)/%.lem
|
||||||
tr '[:upper:]' '[:lower:]' < $< > $@
|
tr '[:upper:]' '[:lower:]' < $< > $@
|
||||||
|
|
||||||
|
corpora/$(CORPUS_NAME)/trg.lem: corpora/$(CORPUS_NAME)/trg.tok
|
||||||
|
mono LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.exe $(TRG_LANG) < $< > $@
|
||||||
|
|
||||||
|
|
||||||
|
corpora/$(CORPUS_NAME)/src.lem: corpora/$(CORPUS_NAME)/src.tok
|
||||||
|
mono LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.exe $(SRC_LANG) < $< > $@
|
||||||
|
|
||||||
corpora/$(CORPUS_NAME)/src.tok: corpora/$(CORPUS_NAME)/src.txt
|
corpora/$(CORPUS_NAME)/src.tok: corpora/$(CORPUS_NAME)/src.txt
|
||||||
europarl/tools/tokenizer.perl -l $(SRC_LANG) < $< > $@
|
europarl/tools/tokenizer.perl -l $(SRC_LANG) < $< > $@
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user