forked from filipg/twilight-library
date fix
This commit is contained in:
parent
53fff21f4d
commit
6153264fcb
@ -28,7 +28,7 @@ toShadowItem ((url, articleTitle), yearlyTitle) =
|
||||
date = getDate url
|
||||
|
||||
getDate url =
|
||||
case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of
|
||||
case url =~~ "/(20[0-9][0-9])/" :: Maybe [[String]] of
|
||||
Just [[_, year]] -> year
|
||||
otherwise -> error $ "unexpected url: " ++ url
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
|
||||
import ShadowLibrary.Core
|
||||
|
||||
@ -14,15 +13,19 @@ import Text.Printf
|
||||
extractRecords = extractLinksWithText "//td/a[contains(@href,'.pdf')]"
|
||||
|
||||
toShadowItem :: (String, String) -> ShadowItem
|
||||
toShadowItem (url, text) =
|
||||
(defaultShadowItem url text) {
|
||||
toShadowItem (url, title) =
|
||||
(defaultShadowItem url title) {
|
||||
originalDate = Just date,
|
||||
itype = "periodical",
|
||||
format = Just "pdf",
|
||||
finalUrl = url
|
||||
}
|
||||
where title = ""
|
||||
date = ""
|
||||
date = getDate $ replace "%20" " " url
|
||||
|
||||
getDate :: String -> String
|
||||
getDate url = date where
|
||||
date = url Text.Regex.Posix.=~ "(202[0-2]|20[0-1][0-9])" :: String
|
||||
|
||||
main = do
|
||||
let start = "https://www.pip.gov.pl/pl/inspektor-pracy/66546,archiwum-inspektora-pracy-.html"
|
||||
|
Loading…
Reference in New Issue
Block a user