Fix grep
This commit is contained in:
parent
cdd82857d5
commit
cce71acc1b
@ -6,10 +6,10 @@ method="$2"
|
|||||||
extract_text() {
|
extract_text() {
|
||||||
if [[ "$method" == "from-tex" ]]
|
if [[ "$method" == "from-tex" ]]
|
||||||
then
|
then
|
||||||
detex "$input_file" | egrep '\S' | grep -v 'unsrt' | perl -pne 's/^\s+| +$//g'
|
detex "$input_file" | grep -E '\S' | grep -v 'unsrt' | perl -pne 's/^\s+| +$//g'
|
||||||
else
|
else
|
||||||
bash helpers/pdf-to-plain-text.sh "$input_file" | perl helpers/strip-references.pl | perl -pne 'chomp $_; $_.=" "'
|
bash helpers/pdf-to-plain-text.sh "$input_file" | perl helpers/strip-references.pl | perl -pne 'chomp $_; $_.=" "'
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
extract_text | python3 -m syntok.segmenter | egrep '\S'
|
extract_text | python3 -m syntok.segmenter | grep -E '\S'
|
||||||
|
Loading…
Reference in New Issue
Block a user