Fix numeration

This commit is contained in:
Wojciech Pakulski 2021-04-10 12:30:09 +02:00
parent 26f9c5c181
commit 438ba590f1

View File

@ -12,7 +12,6 @@ import Text.Printf
import Control.Monad import Control.Monad
extractFirstPage = (downloadDocument &&& this) extractFirstPage = (downloadDocument &&& this)
>>> first (getXPathTrees "//td[@class='ms-gb'][1]" >>> getChildren >>> getText) >>> first (getXPathTrees "//td[@class='ms-gb'][1]" >>> getChildren >>> getText)
@ -20,7 +19,8 @@ wupExtractor = (downloadDocument &&& this)
>>> first ( getXPathTrees "//tr[contains(@class, 'ms-itmhover') or contains(@class, 'ms-alternatingstrong')]" ) >>> first ( getXPathTrees "//tr[contains(@class, 'ms-itmhover') or contains(@class, 'ms-alternatingstrong')]" )
>>> first (deep ( >>> first (deep (
(getXPathTrees "//td[@class='ms-vb2'][6]" >>> getChildren >>> getText) &&& (getXPathTrees "//td[@class='ms-vb2'][6]" >>> getChildren >>> getText) &&&
(getXPathTrees "//td[@class='ms-vb2'][7]/a" >>> getAttrValue "href") (getXPathTrees "//td[@class='ms-vb2'][7]/a" >>> getAttrValue "href") &&&
(getXPathTrees "//td[@class='ms-vb2'][5]" >>> getChildren >>> getText)
)) ))
fetchLinks year xs failedTries = do fetchLinks year xs failedTries = do
@ -38,11 +38,8 @@ fetchLinks year xs failedTries = do
else else
fetchLinks (year + 1) (xs ++ items) 0 fetchLinks (year + 1) (xs ++ items) 0
toShadowItem :: ((String, (String, String)), String) -> ShadowItem
toShadowItem ((date, (relativeFileUrl, itemNr)), url) =
toShadowItem :: ((String, String), String) -> ShadowItem
toShadowItem ((date, relativeFileUrl), url) =
(defaultShadowItem url title) { (defaultShadowItem url title) {
originalDate = Just date, originalDate = Just date,
itype = "periodical", itype = "periodical",
@ -50,9 +47,7 @@ toShadowItem ((date, relativeFileUrl), url) =
finalUrl = fileUrl finalUrl = fileUrl
} }
where fileUrl = "https://grab.uprp.pl" ++ relativeFileUrl where fileUrl = "https://grab.uprp.pl" ++ relativeFileUrl
title = "Wiadomosci Urzedu Patentowego nr " title = "Wiadomosci Urzedu Patentowego Nr. " ++ itemNr
namedShadowItem shadowItem num = title shadowItem ++ show num
main = do main = do
let start = "https://grab.uprp.pl/sites/Wydawnictwa/WydawnictwaArchiwum/WydawnictwaArchiwum/Forms/AllItems.aspx" let start = "https://grab.uprp.pl/sites/Wydawnictwa/WydawnictwaArchiwum/WydawnictwaArchiwum/Forms/AllItems.aspx"
@ -66,17 +61,4 @@ main = do
let firstYear = read $ (head . tail . words . fst . head) firstPageItems :: Int let firstYear = read $ (head . tail . words . fst . head) firstPageItems :: Int
shadowItems <- fetchLinks firstYear [] 0 shadowItems <- fetchLinks firstYear [] 0
mapM_ (putStrLn . show) shadowItems
let namedShadowItems = zipWith (\shadowItem num -> ShadowItem {
url = url shadowItem,
title = (title shadowItem) ++ show (num + 1),
itype = itype shadowItem,
originalDate = originalDate shadowItem,
creator = creator shadowItem,
format = format shadowItem,
lang = lang shadowItem,
finalUrl = finalUrl shadowItem,
description = description shadowItem
}) shadowItems [0 .. length shadowItems]
mapM_ (putStrLn . show) namedShadowItems