Poprawienie funkcji i readme

This commit is contained in:
Anna Nowak 2021-04-07 01:03:47 +02:00
parent 3e9ad1fa0b
commit 5272c99c3a
2 changed files with 19 additions and 6 deletions

View File

@ -1,5 +1,4 @@
{-# LANGUAGE UTF #-}
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
import ShadowLibrary.Core
@ -12,7 +11,7 @@ import Data.List.Utils (replace)
import Text.Regex.Posix
import Text.Printf
extractNestedLinksWithText xpathCondition = (downloadDocumentWithEncoding "UTF-8" &&& this)
extractNestedLinksWithText xpathCondition = (downloadDocument &&& this)
>>> first (getXPathTrees xpathCondition
>>> ((getXPathTrees "//a" >>> getAttrValue "href")
&&& (listA (deep isText >>> getText)
@ -21,6 +20,7 @@ extractNestedLinksWithText xpathCondition = (downloadDocumentWithEncoding "UTF-8
>>> first expandURIFixed
extractRecords = extractLinksWithText "//div[@class='entry-content']/p/a[contains(@href, 'id')]"
>>> first (arr $ replace "http:" "https:")
>>> first (extractNestedLinksWithText "//div[@class='entry-content']/p[strong[a]] | //div[@class='entry-content']/p[a]")
toShadowItem :: ((String, String), String) -> ShadowItem
@ -36,10 +36,10 @@ toShadowItem ((url, articleTitle), magazineTitle) =
getYear :: String -> String
getYear url =
case url =~~ "/rocznik[0-9]{2}/" :: Maybe [[String]] of
Just [[_, year]] -> year
case url =~~ "/(rocznik[0-9][0-9])/" :: Maybe [[String]] of
Just [[_, raw_year]] -> "19" ++ (replace "rocznik" "" raw_year)
otherwise -> case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of
Just [[_, year]] -> "19" ++ (replace "rocznik" "" year)
Just [[_, year]] -> year
otherwise -> ""

View File

@ -1,5 +1,18 @@
# PTD - Polskie Towarzystwo Dendrologiczne
# Dodatkowe paczki zainstalowane na potrzebę uruchomienia programu:
## Wywołanie programu
```bash
stack build
stack exec ptd
```
## Przykładowa krotka wynikowa:
Tytuł w postaci Wydawnictwo - Tytuł
```
ShadowItem {url = Just "http://www.ptd.pl/ptd/wp-content/download/2014/7-Plyty.pdf", title = "Pe\322ne teksty Rocznik\243w PTD, pocz\261wszy od zeszytu 57; \nwersja online \8211 ISSN 2300-8326 - PIOTR DASZKIEWICZ \nP\322yty miedziane i miedzioryty Pierre Richer de Bellevala (1555-1632) w Rzeczpospolitej Obojga Narod\243w \8211 z punktu widzenia historii botaniki\8211 \nPierre Richer de Belleval\8217s (1555-1632) copperplates in the Polish-Lithuanian Commonwealth from the point of view of the history of botany ", itype = "periodical", originalDate = Just "2014", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.ptd.pl/ptd/wp-content/download/2014/7-Plyty.pdf", description = Nothing}
```
W przypadku braku roku znajduje się pusty string
## Dodatkowe paczki zainstalowane na potrzebę uruchomienia programu:
```bash
apt-get install libcurl4-gnutls-dev
```