From b81c455a1b7bcf600c73d1296dc54c3aa8db813d Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 6 Jun 2022 19:59:23 +0200 Subject: [PATCH] Use different sentence extraction method for amu/pl_PL As Polish letters were broken in PDF --- {{cookiecutter.paper_repo_name}}/Makefile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/{{cookiecutter.paper_repo_name}}/Makefile b/{{cookiecutter.paper_repo_name}}/Makefile index 8c68b88..c12faf5 100644 --- a/{{cookiecutter.paper_repo_name}}/Makefile +++ b/{{cookiecutter.paper_repo_name}}/Makefile @@ -72,8 +72,22 @@ $(PAPER_ID).pdf: $(PAPER_ID).tex preamble.tex metadata.tex $(CONTENT_TEX_SOURCES abstract.txt: abstract.tex cat $< | perl -pne 's/noqa\{[^}]+\}//g' | detex > $@ +SENTENCE_EXTRACTION_METHOD=from-pdf + +ifeq ($(LATEX_TEMPLATE),amu) +ifeq ($(AUTOZOIL_LOCALE),pl_PL) +SENTENCE_EXTRACTION_METHOD=from-tex +endif +endif + +ifeq ($(SENTENCE_EXTRACTION_METHOD),from-tex) +sentences.txt: $(PAPER_ID).tex helpers/get-sentences.sh helpers/pdf-to-plain-text.sh helpers/strip-refere +nces.pl + bash helpers/get-sentences.sh $< from-tex > $@ +else sentences.txt: $(PAPER_ID).pdf helpers/get-sentences.sh helpers/pdf-to-plain-text.sh helpers/strip-references.pl bash helpers/get-sentences.sh $< > $@ +endif scores/%.txt: mkdir -p scores