Fix numeration

This commit is contained in:
Wojciech Pakulski 2021-04-10 12:30:09 +02:00
parent 26f9c5c181
commit 438ba590f1

View File

@ -12,7 +12,6 @@ import Text.Printf
import Control.Monad
extractFirstPage = (downloadDocument &&& this)
>>> first (getXPathTrees "//td[@class='ms-gb'][1]" >>> getChildren >>> getText)
@ -20,7 +19,8 @@ wupExtractor = (downloadDocument &&& this)
>>> first ( getXPathTrees "//tr[contains(@class, 'ms-itmhover') or contains(@class, 'ms-alternatingstrong')]" )
>>> first (deep (
(getXPathTrees "//td[@class='ms-vb2'][6]" >>> getChildren >>> getText) &&&
(getXPathTrees "//td[@class='ms-vb2'][7]/a" >>> getAttrValue "href")
(getXPathTrees "//td[@class='ms-vb2'][7]/a" >>> getAttrValue "href") &&&
(getXPathTrees "//td[@class='ms-vb2'][5]" >>> getChildren >>> getText)
))
fetchLinks year xs failedTries = do
@ -38,11 +38,8 @@ fetchLinks year xs failedTries = do
else
fetchLinks (year + 1) (xs ++ items) 0
toShadowItem :: ((String, String), String) -> ShadowItem
toShadowItem ((date, relativeFileUrl), url) =
toShadowItem :: ((String, (String, String)), String) -> ShadowItem
toShadowItem ((date, (relativeFileUrl, itemNr)), url) =
(defaultShadowItem url title) {
originalDate = Just date,
itype = "periodical",
@ -50,9 +47,7 @@ toShadowItem ((date, relativeFileUrl), url) =
finalUrl = fileUrl
}
where fileUrl = "https://grab.uprp.pl" ++ relativeFileUrl
title = "Wiadomosci Urzedu Patentowego nr "
namedShadowItem shadowItem num = title shadowItem ++ show num
title = "Wiadomosci Urzedu Patentowego Nr. " ++ itemNr
main = do
let start = "https://grab.uprp.pl/sites/Wydawnictwa/WydawnictwaArchiwum/WydawnictwaArchiwum/Forms/AllItems.aspx"
@ -66,17 +61,4 @@ main = do
let firstYear = read $ (head . tail . words . fst . head) firstPageItems :: Int
shadowItems <- fetchLinks firstYear [] 0
let namedShadowItems = zipWith (\shadowItem num -> ShadowItem {
url = url shadowItem,
title = (title shadowItem) ++ show (num + 1),
itype = itype shadowItem,
originalDate = originalDate shadowItem,
creator = creator shadowItem,
format = format shadowItem,
lang = lang shadowItem,
finalUrl = finalUrl shadowItem,
description = description shadowItem
}) shadowItems [0 .. length shadowItems]
mapM_ (putStrLn . show) namedShadowItems
mapM_ (putStrLn . show) shadowItems