Version 1.0

2021-03-30 22:32:28 +02:00 · 2021-03-30 22:32:28 +02:00 · 03ffdeb1f7
commit 03ffdeb1f7
parent d8e664eba4
2 changed files with 10 additions and 9 deletions
--- a/app/aneks.hs
+++ b/app/aneks.hs
@ -12,21 +12,22 @@ import Text.Regex.Posix
 import Text.Printf


-extractRecords = extractLinksWithText "//a[contains(@title,'Aneks')]"  -- pary tytuł-adres
-                 >>> first (arr $ replace "\r\n            " " ") -- czyścimy pierwszy element pary, czyli tytuł z niepotrzebnych białych znaków
-                 >>> second (extractLinksWithText "//div/a[contains(@href,'.pdf')]") -- pobieramy stronę z adresu URL i wyciągamy linki z tej strony pasujące do wyrażenia XPathowego
-                 -- ostatecznie wyjdą trójki? (Tytuł, (link do pdf, blank?))
+extractRecords = extractLinksWithText "//a[contains(@title,'Aneks') and contains(text(),'Nr')]"  
+                 >>> second (arr $ replace "\r\n            " " ") 
+                 >>> first (extractLinksWithText "//div/a[contains(@href,'.pdf')]") -- pobieramy stronę z adresu URL i wyciągamy linki z tej strony pasujące do wyrażenia XPathowego
+                 -- ostatecznie wyjdą trójki? ((Link, tekst: "Wyświetl cały numer"), Numer Magazynu)
+

 -- ... a tutaj te trójki przerabiamy do docelowej struktury ShadowItem
-toShadowItem :: (String, (String, String)) -> ShadowItem
-toShadowItem (yearlyTitle, (url, blank)) =
+toShadowItem :: ((String, String), String) -> ShadowItem
+toShadowItem ((url, blank), yearlyTitle) =
  (defaultShadowItem url title) {
    originalDate = Just date,
    itype = "periodical",
    format = Just "pdf",
    finalUrl = url
    }
-  where title = yearlyTitle ++ " " ++ (replace "\r\n" "" (replace "\r\n          " "" blank))
+  where title = yearlyTitle -- ++ " "  ++ blank
        date = getDate url

 getDate url =
@ -42,4 +43,4 @@ main = do
                                       abbrev="ArchAnek",
                                       lLevel=0,
                                       webpage=start}
-    extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem)
+    extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem)
--- a/shadow-library.cabal
+++ b/shadow-library.cabal
@ -68,7 +68,7 @@ executable aneks
   build-depends:       base
                      , hxt
                      , hxt-xpath
-		      , hxt-curl
+                      , hxt-curl
                      , MissingH
                      , regex-posix
                      , shadow-library