version without file size

This commit is contained in:
AdamOsiowy123 2022-04-10 11:57:49 +02:00
parent aab2421c7a
commit 73c2bdd034
3 changed files with 22 additions and 13 deletions

View File

@ -33,7 +33,7 @@ import Data.Tree.NTree.TypeDefs
import Data.Maybe import Data.Maybe
import Control.Monad.Trans import Control.Monad.Trans
import Text.XML.HXT.XPath import Text.XML.HXT.XPath
-- import Text.XML.HXT.Curl import Text.XML.HXT.Curl
import Text.XML.HXT.HTTP import Text.XML.HXT.HTTP
import Text.Regex.TDFA import Text.Regex.TDFA
@ -64,8 +64,8 @@ downloadDocument = readFromDocument [withParseHTML yes,
withEncodingErrors no, withEncodingErrors no,
withPreserveComment yes, withPreserveComment yes,
withStrictInput yes, withStrictInput yes,
withHTTP [] -- withHTTP []
-- withCurl [("curl--user-agent","AMU Digital Libraries Indexing Agent")] withCurl [("curl--user-agent","AMU Digital Libraries Indexing Agent")]
] ]
downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes, downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes,
@ -73,13 +73,13 @@ downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes,
withEncodingErrors no, withEncodingErrors no,
withPreserveComment yes, withPreserveComment yes,
withInputEncoding enc, withInputEncoding enc,
withHTTP []] -- withHTTP []]
-- withCurl []] withCurl []]
downloadXmlDocument = readFromDocument [withWarnings no, downloadXmlDocument = readFromDocument [withWarnings no,
withEncodingErrors no, withEncodingErrors no,
withHTTP []] -- withHTTP []]
-- withCurl [] ] withCurl [] ]
data ShadowLibrary = ShadowLibrary { logoUrl :: Maybe String, data ShadowLibrary = ShadowLibrary { logoUrl :: Maybe String,

View File

@ -20,15 +20,22 @@ toShadowItem (url, articleTitle) =
originalDate = Just date, originalDate = Just date,
itype = "periodical", itype = "periodical",
format = Just "pdf", format = Just "pdf",
finalUrl = url finalUrl = url,
description = Just desc
} }
where title = articleTitle where title = "Miasto Bierun: " ++ articleTitle
date = getDate url date = getDate articleTitle
desc = getArticleNr articleTitle
getDate url = getDate title =
case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of case title =~~ "(19[0-9][0-9]|20[0-9][0-9])" :: Maybe [[String]] of
Just [[_, year]] -> year Just [[_, year]] -> year
otherwise -> error $ "unexpected url: " ++ url otherwise -> "No date for: " ++ title
getArticleNr title =
case title =~~ "([0-9][0-9]/)" :: Maybe [[String]] of
Just [[_, nr]] -> "Article nr: " ++ (replace "/" "" nr)
otherwise -> "No article nr for: " ++ title
main = do main = do

View File

@ -20,6 +20,7 @@ library
, HTTP , HTTP
, hxt , hxt
, hxt-http , hxt-http
, hxt-curl
, hxt-xpath , hxt-xpath
, MissingH , MissingH
, monad-logger , monad-logger
@ -54,6 +55,7 @@ executable almanachmuszyny
build-depends: base build-depends: base
, hxt , hxt
, hxt-xpath , hxt-xpath
, hxt-curl
, MissingH , MissingH
, regex-posix , regex-posix
, shadow-library , shadow-library