From 2cf1db1275fb528de113b1e7e0f454b3f5cff10c Mon Sep 17 00:00:00 2001 From: Mariusz B Date: Sun, 18 Apr 2021 19:42:41 +0000 Subject: [PATCH] remove from url #page=1 --- app/archiwumharcerskie.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/app/archiwumharcerskie.hs b/app/archiwumharcerskie.hs index 6759e46..34bfd5b 100644 --- a/app/archiwumharcerskie.hs +++ b/app/archiwumharcerskie.hs @@ -15,6 +15,7 @@ import Text.Printf extractRecords = extractLinksWithText "//nav//a[contains(@href,'title=Kategoria:')]" >>> first (extractLinksWithText "//a[contains(@href,'title=Plik') and contains(@href,'pdf')]") >>> first (first (extractLinksGeneralized "//div[@id='file']//iframe" "src")) + >>> first (first (arr $ replace "#page=1" "")) -- proby pobrania pdf i jpg --extractRecords = extractLinksWithText "//nav//a[contains(@href,'title=Kategoria:14_WDH')]"