This commit is contained in:
Kacper 2022-05-19 23:28:20 +02:00
parent 53fff21f4d
commit 6153264fcb
2 changed files with 8 additions and 5 deletions

View File

@ -28,7 +28,7 @@ toShadowItem ((url, articleTitle), yearlyTitle) =
date = getDate url
getDate url =
case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of
case url =~~ "/(20[0-9][0-9])/" :: Maybe [[String]] of
Just [[_, year]] -> year
otherwise -> error $ "unexpected url: " ++ url

View File

@ -1,4 +1,3 @@
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
import ShadowLibrary.Core
@ -14,15 +13,19 @@ import Text.Printf
extractRecords = extractLinksWithText "//td/a[contains(@href,'.pdf')]"
toShadowItem :: (String, String) -> ShadowItem
toShadowItem (url, text) =
(defaultShadowItem url text) {
toShadowItem (url, title) =
(defaultShadowItem url title) {
originalDate = Just date,
itype = "periodical",
format = Just "pdf",
finalUrl = url
}
where title = ""
date = ""
date = getDate $ replace "%20" " " url
getDate :: String -> String
getDate url = date where
date = url Text.Regex.Posix.=~ "(202[0-2]|20[0-1][0-9])" :: String
main = do
let start = "https://www.pip.gov.pl/pl/inspektor-pracy/66546,archiwum-inspektora-pracy-.html"