From cc670f1f7cefed675aae42b5903cb826eee9134a Mon Sep 17 00:00:00 2001 From: s434695 Date: Tue, 13 Apr 2021 22:37:09 +0200 Subject: [PATCH] dziala, ale bez tytulu i daty --- app/pbsociety.hs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/app/pbsociety.hs b/app/pbsociety.hs index 88ec9a4..7692762 100644 --- a/app/pbsociety.hs +++ b/app/pbsociety.hs @@ -13,21 +13,17 @@ import Text.Printf extractRecords = extractLinksWithText "//a[@class='image-link']" -- pary adres-tytuł - >>> second (arr $ replace "\n" " ") - >>> first (extractLinksWithText "//div/a[contains(@href,'.pdf') and not(@class)]") -- pobieramy stronę z adresu URL i wyciągamy linki z tej strony pasujące do wyrażenia XPathowego - -- ostatecznie wyjdą trójki ((adres URL, tytuł artykułu), tytuł rocznika) + >>> first (extractLinksWithText "//div/a[contains(@href,'.pdf') and not(@class)]") -- ... a tutaj te trójki przerabiamy do docelowej struktury ShadowItem -toShadowItem :: ((String, String), String) -> ShadowItem +toShadowItem :: ((String, String),String) -> ShadowItem toShadowItem ((url, articleTitle), yearlyTitle) = (defaultShadowItem url title) { - originalDate = Just date, itype = "periodical", format = Just "pdf", finalUrl = url } - where title = "Pbsociety " ++ yearlyTitle - date = getDate url + where title = "Pbsociety " getDate url = case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of