diff --git a/app/parafiasrem.hs b/app/parafiasrem.hs new file mode 100644 index 0000000..1510f48 --- /dev/null +++ b/app/parafiasrem.hs @@ -0,0 +1,41 @@ +{-# LANGUAGE Arrows, NoMonomorphismRestriction #-} +import ShadowLibrary.Core + +import Text.XML.HXT.Core +import Text.XML.HXT.XPath +-- import Text.XML.HXT.Curl +import Data.List +import Data.List.Utils (replace) + +import Text.Regex.Posix +import Text.Printf + + +extractRecords = extractLinksWithText "//a[contains(@href, 'index.php?download')]" -- pary adres-tytuł + >>> second (arr $ replace "\r\n " " ") -- czyścimy drugi element pary, czyli tytuł z niepotrzebnych białych znaków + +toShadowItem :: (String, String) -> ShadowItem +toShadowItem (url, articleTitle) = + (defaultShadowItem url title) { + originalDate = Just date, + itype = "periodical", + format = Just "pdf", + finalUrl = url + } + where title = "Parafia Srem " ++ articleTitle + date = getDate url + +getDate url = + case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of + Just [[_, year]] -> year + otherwise -> error $ "unexpected url: " ++ url + + +main = do + let start = "https://sremfara.pl/miesiecznik_30.html" + let shadowLibrary = ShadowLibrary {logoUrl=Nothing, + lname="Parafia Srem", + abbrev="ParSre", + lLevel=0, + webpage=start} + extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem) diff --git a/shadow-library.cabal b/shadow-library.cabal index 1791faa..ac46334 100644 --- a/shadow-library.cabal +++ b/shadow-library.cabal @@ -71,6 +71,17 @@ executable jujitsu , shadow-library default-language: Haskell2010 +executable parafiasrem + hs-source-dirs: app + main-is: parafiasrem.hs + ghc-options: -threaded -rtsopts -with-rtsopts=-N + build-depends: base + , hxt + , hxt-xpath + , MissingH + , regex-posix + , shadow-library + default-language: Haskell2010 source-repository head type: git