Fix numeration
This commit is contained in:
parent
26f9c5c181
commit
438ba590f1
@ -12,7 +12,6 @@ import Text.Printf
|
|||||||
|
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
|
||||||
|
|
||||||
extractFirstPage = (downloadDocument &&& this)
|
extractFirstPage = (downloadDocument &&& this)
|
||||||
>>> first (getXPathTrees "//td[@class='ms-gb'][1]" >>> getChildren >>> getText)
|
>>> first (getXPathTrees "//td[@class='ms-gb'][1]" >>> getChildren >>> getText)
|
||||||
|
|
||||||
@ -20,7 +19,8 @@ wupExtractor = (downloadDocument &&& this)
|
|||||||
>>> first ( getXPathTrees "//tr[contains(@class, 'ms-itmhover') or contains(@class, 'ms-alternatingstrong')]" )
|
>>> first ( getXPathTrees "//tr[contains(@class, 'ms-itmhover') or contains(@class, 'ms-alternatingstrong')]" )
|
||||||
>>> first (deep (
|
>>> first (deep (
|
||||||
(getXPathTrees "//td[@class='ms-vb2'][6]" >>> getChildren >>> getText) &&&
|
(getXPathTrees "//td[@class='ms-vb2'][6]" >>> getChildren >>> getText) &&&
|
||||||
(getXPathTrees "//td[@class='ms-vb2'][7]/a" >>> getAttrValue "href")
|
(getXPathTrees "//td[@class='ms-vb2'][7]/a" >>> getAttrValue "href") &&&
|
||||||
|
(getXPathTrees "//td[@class='ms-vb2'][5]" >>> getChildren >>> getText)
|
||||||
))
|
))
|
||||||
|
|
||||||
fetchLinks year xs failedTries = do
|
fetchLinks year xs failedTries = do
|
||||||
@ -38,11 +38,8 @@ fetchLinks year xs failedTries = do
|
|||||||
else
|
else
|
||||||
fetchLinks (year + 1) (xs ++ items) 0
|
fetchLinks (year + 1) (xs ++ items) 0
|
||||||
|
|
||||||
|
toShadowItem :: ((String, (String, String)), String) -> ShadowItem
|
||||||
|
toShadowItem ((date, (relativeFileUrl, itemNr)), url) =
|
||||||
|
|
||||||
toShadowItem :: ((String, String), String) -> ShadowItem
|
|
||||||
toShadowItem ((date, relativeFileUrl), url) =
|
|
||||||
(defaultShadowItem url title) {
|
(defaultShadowItem url title) {
|
||||||
originalDate = Just date,
|
originalDate = Just date,
|
||||||
itype = "periodical",
|
itype = "periodical",
|
||||||
@ -50,9 +47,7 @@ toShadowItem ((date, relativeFileUrl), url) =
|
|||||||
finalUrl = fileUrl
|
finalUrl = fileUrl
|
||||||
}
|
}
|
||||||
where fileUrl = "https://grab.uprp.pl" ++ relativeFileUrl
|
where fileUrl = "https://grab.uprp.pl" ++ relativeFileUrl
|
||||||
title = "Wiadomosci Urzedu Patentowego nr "
|
title = "Wiadomosci Urzedu Patentowego Nr. " ++ itemNr
|
||||||
|
|
||||||
namedShadowItem shadowItem num = title shadowItem ++ show num
|
|
||||||
|
|
||||||
main = do
|
main = do
|
||||||
let start = "https://grab.uprp.pl/sites/Wydawnictwa/WydawnictwaArchiwum/WydawnictwaArchiwum/Forms/AllItems.aspx"
|
let start = "https://grab.uprp.pl/sites/Wydawnictwa/WydawnictwaArchiwum/WydawnictwaArchiwum/Forms/AllItems.aspx"
|
||||||
@ -66,17 +61,4 @@ main = do
|
|||||||
let firstYear = read $ (head . tail . words . fst . head) firstPageItems :: Int
|
let firstYear = read $ (head . tail . words . fst . head) firstPageItems :: Int
|
||||||
|
|
||||||
shadowItems <- fetchLinks firstYear [] 0
|
shadowItems <- fetchLinks firstYear [] 0
|
||||||
|
mapM_ (putStrLn . show) shadowItems
|
||||||
let namedShadowItems = zipWith (\shadowItem num -> ShadowItem {
|
|
||||||
url = url shadowItem,
|
|
||||||
title = (title shadowItem) ++ show (num + 1),
|
|
||||||
itype = itype shadowItem,
|
|
||||||
originalDate = originalDate shadowItem,
|
|
||||||
creator = creator shadowItem,
|
|
||||||
format = format shadowItem,
|
|
||||||
lang = lang shadowItem,
|
|
||||||
finalUrl = finalUrl shadowItem,
|
|
||||||
description = description shadowItem
|
|
||||||
}) shadowItems [0 .. length shadowItems]
|
|
||||||
|
|
||||||
mapM_ (putStrLn . show) namedShadowItems
|
|
||||||
|
Loading…
Reference in New Issue
Block a user