version without file size

This commit is contained in:
AdamOsiowy123 2022-04-10 11:57:49 +02:00
parent aab2421c7a
commit 73c2bdd034
3 changed files with 22 additions and 13 deletions

View File

@ -33,7 +33,7 @@ import Data.Tree.NTree.TypeDefs
import Data.Maybe
import Control.Monad.Trans
import Text.XML.HXT.XPath
-- import Text.XML.HXT.Curl
import Text.XML.HXT.Curl
import Text.XML.HXT.HTTP
import Text.Regex.TDFA
@ -64,8 +64,8 @@ downloadDocument = readFromDocument [withParseHTML yes,
withEncodingErrors no,
withPreserveComment yes,
withStrictInput yes,
withHTTP []
-- withCurl [("curl--user-agent","AMU Digital Libraries Indexing Agent")]
-- withHTTP []
withCurl [("curl--user-agent","AMU Digital Libraries Indexing Agent")]
]
downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes,
@ -73,13 +73,13 @@ downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes,
withEncodingErrors no,
withPreserveComment yes,
withInputEncoding enc,
withHTTP []]
-- withCurl []]
-- withHTTP []]
withCurl []]
downloadXmlDocument = readFromDocument [withWarnings no,
withEncodingErrors no,
withHTTP []]
-- withCurl [] ]
-- withHTTP []]
withCurl [] ]
data ShadowLibrary = ShadowLibrary { logoUrl :: Maybe String,

View File

@ -20,15 +20,22 @@ toShadowItem (url, articleTitle) =
originalDate = Just date,
itype = "periodical",
format = Just "pdf",
finalUrl = url
finalUrl = url,
description = Just desc
}
where title = articleTitle
date = getDate url
where title = "Miasto Bierun: " ++ articleTitle
date = getDate articleTitle
desc = getArticleNr articleTitle
getDate url =
case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of
getDate title =
case title =~~ "(19[0-9][0-9]|20[0-9][0-9])" :: Maybe [[String]] of
Just [[_, year]] -> year
otherwise -> error $ "unexpected url: " ++ url
otherwise -> "No date for: " ++ title
getArticleNr title =
case title =~~ "([0-9][0-9]/)" :: Maybe [[String]] of
Just [[_, nr]] -> "Article nr: " ++ (replace "/" "" nr)
otherwise -> "No article nr for: " ++ title
main = do

View File

@ -20,6 +20,7 @@ library
, HTTP
, hxt
, hxt-http
, hxt-curl
, hxt-xpath
, MissingH
, monad-logger
@ -54,6 +55,7 @@ executable almanachmuszyny
build-depends: base
, hxt
, hxt-xpath
, hxt-curl
, MissingH
, regex-posix
, shadow-library