From cce71acc1b121bf310181278379721af5d5e1e30 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Wed, 9 Nov 2022 16:55:56 +0100 Subject: [PATCH] Fix grep --- {{cookiecutter.paper_repo_name}}/helpers/get-sentences.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/{{cookiecutter.paper_repo_name}}/helpers/get-sentences.sh b/{{cookiecutter.paper_repo_name}}/helpers/get-sentences.sh index a462187..c05dec6 100755 --- a/{{cookiecutter.paper_repo_name}}/helpers/get-sentences.sh +++ b/{{cookiecutter.paper_repo_name}}/helpers/get-sentences.sh @@ -6,10 +6,10 @@ method="$2" extract_text() { if [[ "$method" == "from-tex" ]] then - detex "$input_file" | egrep '\S' | grep -v 'unsrt' | perl -pne 's/^\s+| +$//g' + detex "$input_file" | grep -E '\S' | grep -v 'unsrt' | perl -pne 's/^\s+| +$//g' else bash helpers/pdf-to-plain-text.sh "$input_file" | perl helpers/strip-references.pl | perl -pne 'chomp $_; $_.=" "' fi } -extract_text | python3 -m syntok.segmenter | egrep '\S' +extract_text | python3 -m syntok.segmenter | grep -E '\S'