This commit is contained in:
nlitkowski 2021-04-07 02:53:08 +02:00
parent 3314561a8a
commit 6472697a79

View File

@ -10,7 +10,7 @@ import Data.List.Utils (replace)
import Text.Regex.Posix import Text.Regex.Posix
import Text.Printf import Text.Printf
import Debug.Trace
extractNestedLinksWithText xpathCondition = (downloadDocument &&& this) extractNestedLinksWithText xpathCondition = (downloadDocument &&& this)
>>> first (getXPathTrees xpathCondition >>> first (getXPathTrees xpathCondition
@ -27,7 +27,7 @@ getLinkAndText xpathCondition = proc doc -> do
xpathTrees <- getXPathTrees xpathCondition -< doc xpathTrees <- getXPathTrees xpathCondition -< doc
href <- (getXPathTrees "//a" >>> getAttrValue "href") -< xpathTrees href <- (getXPathTrees "//a" >>> getAttrValue "href") -< xpathTrees
txt <- (listA (deep isText >>> getText) >>> arr (intercalate " ")) -< xpathTrees txt <- (listA (deep isText >>> getText) >>> arr (intercalate " ")) -< xpathTrees
returnA -< (href, txt) returnA -< traceShowId (href, txt)
extractNestedLinksWithText2 xpathCondition = proc url -> do extractNestedLinksWithText2 xpathCondition = proc url -> do
@ -77,4 +77,4 @@ main = do
abbrev="ZboBiel", abbrev="ZboBiel",
lLevel=0, lLevel=0,
webpage=start} webpage=start}
extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem) extractItemsStartingFromUrl shadowLibrary start (extractRecords2 >>> arr toShadowItem)