From 73c2bdd0346c101eb5f137c804fd77322eb134a9 Mon Sep 17 00:00:00 2001 From: AdamOsiowy123 Date: Sun, 10 Apr 2022 11:57:49 +0200 Subject: [PATCH] version without file size --- ShadowLibrary/Core.hs | 14 +++++++------- app/miastobierun.hs | 19 +++++++++++++------ shadow-library.cabal | 2 ++ 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/ShadowLibrary/Core.hs b/ShadowLibrary/Core.hs index 678df33..2cbc0c6 100644 --- a/ShadowLibrary/Core.hs +++ b/ShadowLibrary/Core.hs @@ -33,7 +33,7 @@ import Data.Tree.NTree.TypeDefs import Data.Maybe import Control.Monad.Trans import Text.XML.HXT.XPath --- import Text.XML.HXT.Curl +import Text.XML.HXT.Curl import Text.XML.HXT.HTTP import Text.Regex.TDFA @@ -64,8 +64,8 @@ downloadDocument = readFromDocument [withParseHTML yes, withEncodingErrors no, withPreserveComment yes, withStrictInput yes, - withHTTP [] --- withCurl [("curl--user-agent","AMU Digital Libraries Indexing Agent")] +-- withHTTP [] + withCurl [("curl--user-agent","AMU Digital Libraries Indexing Agent")] ] downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes, @@ -73,13 +73,13 @@ downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes, withEncodingErrors no, withPreserveComment yes, withInputEncoding enc, - withHTTP []] --- withCurl []] +-- withHTTP []] + withCurl []] downloadXmlDocument = readFromDocument [withWarnings no, withEncodingErrors no, - withHTTP []] --- withCurl [] ] +-- withHTTP []] + withCurl [] ] data ShadowLibrary = ShadowLibrary { logoUrl :: Maybe String, diff --git a/app/miastobierun.hs b/app/miastobierun.hs index f73c32f..5e9c064 100644 --- a/app/miastobierun.hs +++ b/app/miastobierun.hs @@ -20,15 +20,22 @@ toShadowItem (url, articleTitle) = originalDate = Just date, itype = "periodical", format = Just "pdf", - finalUrl = url + finalUrl = url, + description = Just desc } - where title = articleTitle - date = getDate url + where title = "Miasto Bierun: " ++ articleTitle + date = getDate articleTitle + desc = getArticleNr articleTitle -getDate url = - case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of +getDate title = + case title =~~ "(19[0-9][0-9]|20[0-9][0-9])" :: Maybe [[String]] of Just [[_, year]] -> year - otherwise -> error $ "unexpected url: " ++ url + otherwise -> "No date for: " ++ title + +getArticleNr title = + case title =~~ "([0-9][0-9]/)" :: Maybe [[String]] of + Just [[_, nr]] -> "Article nr: " ++ (replace "/" "" nr) + otherwise -> "No article nr for: " ++ title main = do diff --git a/shadow-library.cabal b/shadow-library.cabal index 3660927..01b6ee2 100644 --- a/shadow-library.cabal +++ b/shadow-library.cabal @@ -20,6 +20,7 @@ library , HTTP , hxt , hxt-http + , hxt-curl , hxt-xpath , MissingH , monad-logger @@ -54,6 +55,7 @@ executable almanachmuszyny build-depends: base , hxt , hxt-xpath + , hxt-curl , MissingH , regex-posix , shadow-library