diff --git a/app/teatrLalek.hs b/app/teatrLalek.hs new file mode 100644 index 0000000..575cc52 --- /dev/null +++ b/app/teatrLalek.hs @@ -0,0 +1,37 @@ + +{-# LANGUAGE Arrows, NoMonomorphismRestriction #-} +import ShadowLibrary.Core + +import Text.XML.HXT.Core +import Text.XML.HXT.XPath +-- import Text.XML.HXT.Curl +import Data.List +import Data.List.Utils (replace) + +import Text.Regex.Posix +import Text.Printf + + +extractRecords = extractLinks "//a[contains(@href, '.pdf')]" + + +toShadowItem :: String -> ShadowItem +toShadowItem url = + (defaultShadowItem url []) { + originalDate = Just date, + itype = "periodical", + format = Just "pdf", + finalUrl = url + } + where + date = last $ getAllTextMatches $ url =~ "(19[0-9][0-9]|20[0-9][0-9])" :: String + + +main = do + let start = "http://polunima.pl/teatr-lalek/" + let shadowLibrary = ShadowLibrary {logoUrl=Nothing, + lname="Teatr Lalek", + abbrev="Teatr", + lLevel=0, + webpage=start} + extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem)