diff --git a/app/ptd.hs b/app/ptd.hs index 6c11946..4d6a021 100644 --- a/app/ptd.hs +++ b/app/ptd.hs @@ -1,5 +1,4 @@ -{-# LANGUAGE UTF #-} {-# LANGUAGE Arrows, NoMonomorphismRestriction #-} import ShadowLibrary.Core @@ -12,7 +11,7 @@ import Data.List.Utils (replace) import Text.Regex.Posix import Text.Printf -extractNestedLinksWithText xpathCondition = (downloadDocumentWithEncoding "UTF-8" &&& this) +extractNestedLinksWithText xpathCondition = (downloadDocument &&& this) >>> first (getXPathTrees xpathCondition >>> ((getXPathTrees "//a" >>> getAttrValue "href") &&& (listA (deep isText >>> getText) @@ -21,6 +20,7 @@ extractNestedLinksWithText xpathCondition = (downloadDocumentWithEncoding "UTF-8 >>> first expandURIFixed extractRecords = extractLinksWithText "//div[@class='entry-content']/p/a[contains(@href, 'id')]" + >>> first (arr $ replace "http:" "https:") >>> first (extractNestedLinksWithText "//div[@class='entry-content']/p[strong[a]] | //div[@class='entry-content']/p[a]") toShadowItem :: ((String, String), String) -> ShadowItem @@ -36,10 +36,10 @@ toShadowItem ((url, articleTitle), magazineTitle) = getYear :: String -> String getYear url = - case url =~~ "/rocznik[0-9]{2}/" :: Maybe [[String]] of - Just [[_, year]] -> year + case url =~~ "/(rocznik[0-9][0-9])/" :: Maybe [[String]] of + Just [[_, raw_year]] -> "19" ++ (replace "rocznik" "" raw_year) otherwise -> case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of - Just [[_, year]] -> "19" ++ (replace "rocznik" "" year) + Just [[_, year]] -> year otherwise -> "" diff --git a/readme.md b/readme.md index 8530c33..cf08a0c 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,18 @@ +# PTD - Polskie Towarzystwo Dendrologiczne -# Dodatkowe paczki zainstalowane na potrzebę uruchomienia programu: +## Wywołanie programu +```bash +stack build +stack exec ptd ``` + +## Przykładowa krotka wynikowa: +Tytuł w postaci Wydawnictwo - Tytuł +``` +ShadowItem {url = Just "http://www.ptd.pl/ptd/wp-content/download/2014/7-Plyty.pdf", title = "Pe\322ne teksty Rocznik\243w PTD, pocz\261wszy od zeszytu 57; \nwersja online \8211 ISSN 2300-8326 - PIOTR DASZKIEWICZ \nP\322yty miedziane i miedzioryty Pierre Richer de Bellevala (1555-1632) w Rzeczpospolitej Obojga Narod\243w \8211 z punktu widzenia historii botaniki\8211 \nPierre Richer de Belleval\8217s (1555-1632) copperplates in the Polish-Lithuanian Commonwealth from the point of view of the history of botany ", itype = "periodical", originalDate = Just "2014", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.ptd.pl/ptd/wp-content/download/2014/7-Plyty.pdf", description = Nothing} +``` +W przypadku braku roku znajduje się pusty string +## Dodatkowe paczki zainstalowane na potrzebę uruchomienia programu: +```bash apt-get install libcurl4-gnutls-dev ``` \ No newline at end of file