diff --git a/app/almanachmuszyny.hs b/app/almanachmuszyny.hs index 2569177..8d1f9e5 100644 --- a/app/almanachmuszyny.hs +++ b/app/almanachmuszyny.hs @@ -28,7 +28,7 @@ toShadowItem ((url, articleTitle), yearlyTitle) = date = getDate url getDate url = - case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of + case url =~~ "/(20[0-9][0-9])/" :: Maybe [[String]] of Just [[_, year]] -> year otherwise -> error $ "unexpected url: " ++ url diff --git a/app/inspektoratpracy.hs b/app/inspektoratpracy.hs index 5b44012..d4a95f1 100644 --- a/app/inspektoratpracy.hs +++ b/app/inspektoratpracy.hs @@ -1,4 +1,3 @@ - {-# LANGUAGE Arrows, NoMonomorphismRestriction #-} import ShadowLibrary.Core @@ -14,15 +13,19 @@ import Text.Printf extractRecords = extractLinksWithText "//td/a[contains(@href,'.pdf')]" toShadowItem :: (String, String) -> ShadowItem -toShadowItem (url, text) = - (defaultShadowItem url text) { +toShadowItem (url, title) = + (defaultShadowItem url title) { originalDate = Just date, itype = "periodical", format = Just "pdf", finalUrl = url } where title = "" - date = "" + date = getDate $ replace "%20" " " url + +getDate :: String -> String +getDate url = date where + date = url Text.Regex.Posix.=~ "(202[0-2]|20[0-1][0-9])" :: String main = do let start = "https://www.pip.gov.pl/pl/inspektor-pracy/66546,archiwum-inspektora-pracy-.html"