From 210b80656e4142ae9348bcb7284b89b8e7409ec9 Mon Sep 17 00:00:00 2001 From: AdamOsiowy123 Date: Mon, 11 Apr 2022 10:49:27 +0200 Subject: [PATCH] add custom method to extract links --- app/miastobierun.hs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/app/miastobierun.hs b/app/miastobierun.hs index 5e9c064..cea4e26 100644 --- a/app/miastobierun.hs +++ b/app/miastobierun.hs @@ -9,11 +9,26 @@ import Data.List.Utils (replace) import Text.Regex.Posix import Text.Printf +--extractCustomLinks xpathCondition1 = (downloadDocument &&& this) +-- >>> first (getXPathTrees xpathCondition1 +-- >>> (getAttrValue "href" +-- &&& getAttrValue "title" +-- )) +-- >>> arr rotateSecTh +-- >>> first expandURIFixed -extractRecords = extractLinksWithText "//td[@cell-header='Nazwa pliku']/a" -- pary adres-tytuł - >>> second (arr $ replace "\r\n" "") -- czyścimy drugi element pary, czyli tytuł z niepotrzebnych białych znaków +extractCustomLinks xpathCondition1 xpathCondition2 xpathCondition3= (downloadDocument &&& this) + >>> first (getXPathTrees xpathCondition1 + >>> first (getXPathTrees xpathCondition2 >>> (getAttrValue "href" &&& getAttrValue "title")) + >>> second (getXPathTrees xpathCondition3 >>> getAttrValue "cell-header") + ) + >>> arr rotateSecThX + >>> first expandURIFixed + >>> second expandURIFixed + +extractRecords = extractCustomLinks "//tr[@class=''] | //tr[@class='_a']" "//td[@cell-header='Nazwa pliku']/a" "//td[@class='size']" +--extractRecords = extractCustomLinks "//td[@cell-header='Nazwa pliku']/a" --- ... a tutaj te dwójki przerabiamy do docelowej struktury ShadowItem toShadowItem :: (String, String) -> ShadowItem toShadowItem (url, articleTitle) = (defaultShadowItem url title) { @@ -23,9 +38,9 @@ toShadowItem (url, articleTitle) = finalUrl = url, description = Just desc } - where title = "Miasto Bierun: " ++ articleTitle + where title = "Miasto Bierun: " ++ " url: " ++ url ++ " title: " ++ articleTitle date = getDate articleTitle - desc = getArticleNr articleTitle + desc = getArticleNr articleTitle ++ " size: " getDate title = case title =~~ "(19[0-9][0-9]|20[0-9][0-9])" :: Maybe [[String]] of