{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
import ShadowLibrary.Core

import Text.XML.HXT.Core
import Text.XML.HXT.XPath
-- import Text.XML.HXT.Curl
import Data.List
import Data.List.Utils (replace)

import Text.Regex.Posix
import Text.Printf

-- wyciaganie pdf-ow
extractRecords = extractLinksWithText "//nav//a[contains(@href,'title=Kategoria:Dokumenty')]"
               >>> first (extractLinksWithText "//a[contains(@href,'title=Plik') and contains(@href,'pdf')]")
               >>> first (first (extractLinksGeneralized  "//div[@id='file']//iframe" "src"))

-- proby pobrania pdf i jpg
--extractRecords = extractLinksWithText "//nav//a[contains(@href,'title=Kategoria:14_WDH')]"
--               >>> first (extractLinksWithText "//a[contains(@href,'title=Plik') and (contains(@href,'pdf') or contains(@href,'jpg'))]") 
--               >>> first ( first (
--                               downloadDocument
--                               >>> (getXPathTrees "//div[@id='file']//iframe"
--                                   >>> getAttrValue "src") *** (getXPathTrees "//div[@id='file']//a"
--                                   >>> getAttrValue "href")
--                               >>> first (expandURIFixed)
--                   ))

toShadowItem :: ((String, String), String) -> ShadowItem
toShadowItem ((url, fileTitle), emptyTmp) =
  (defaultShadowItem url title) {
    originalDate = date,
    itype = "periodical",
    format = ext,
    finalUrl = url
    }
  where title = "Archiwum Harcerskie - " ++ fileTitle
        date = extractDate url
        ext = extractFormat url

extractDate :: String -> Maybe String
extractDate n =
  case n =~~ ("(((19[0-9]{2})|(2[0-2]{1}[0-9]{2}))(((-[0-1]{1}[0-9]{1}-[0-9]{2})|)|((-[0-1]{1}[0-9]{1}[^0-9][ _-]{1}))))" :: String) of
    Just date -> Just date
    otherwise -> Nothing


main = do
    let start = "http://archiwumharcerskie.pl/index.php?title=Strona_główna"
    let shadowLibrary = ShadowLibrary {logoUrl=Nothing,
                                       lname="Archiwum Harcerskie",
                                       abbrev="ArchHarc",
                                       lLevel=0,
                                       webpage=start}
    extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem)
--    extractItemsStartingFromUrl shadowLibrary start (extractRecords)