my solution

This commit is contained in:
Natalia Gawron 2021-04-02 16:57:47 +02:00
parent fbcdb2f2bc
commit a5d9f23ca8
8 changed files with 548 additions and 0 deletions

1
LICENSE Normal file
View File

@ -0,0 +1 @@
Prioprietary.

304
ShadowLibrary/Core.hs Normal file
View File

@ -0,0 +1,304 @@
{-# LANGUAGE QuasiQuotes, TemplateHaskell, TypeFamilies, OverloadedStrings #-}
{-# LANGUAGE GADTs, FlexibleContexts #-}
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module ShadowLibrary.Core where
-- import Database.Persist
-- import Database.Persist.TH
-- import Database.Persist.Postgresql
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Logger (runNoLoggingT)
import Control.Monad.Trans.Resource (runResourceT)
import Control.Monad.Reader
import Data.Time
import Data.Time.Calendar
import Data.Time.LocalTime
import Data.Time.Zones
import Data.Time.Zones.All
import Data.String.Utils (strip)
import Data.List.Utils as DLU
import Text.XML.HXT.Core
import Network.HTTP
import Network.URI
import Control.Monad.Trans.Maybe
import Control.Monad
import Data.Tree.NTree.TypeDefs
import Data.Maybe
import Control.Monad.Trans
import Text.XML.HXT.XPath
import Text.XML.HXT.Curl
import Text.XML.HXT.HTTP
import Text.Regex.TDFA
import Data.List (isInfixOf, intercalate)
import Data.List.Utils (replace)
-- import Network.Curl.Opts
polishTimeZone = TimeZone {
timeZoneMinutes = 120,
timeZoneSummerOnly = True,
timeZoneName = ""}
openUrl :: String -> MaybeT IO String
openUrl url = case parseURI url of
Nothing -> fail ""
Just u -> liftIO (getResponseBody =<< simpleHTTP (mkRequest GET u))
getWebPage :: String -> IO (IOSArrow XmlTree (NTree XNode))
getWebPage url = do
contents <- runMaybeT $ openUrl url
return $ readString [withParseHTML yes, withWarnings no] (fromMaybe "" contents)
downloadDocument = readFromDocument [withParseHTML yes,
withWarnings no,
withEncodingErrors no,
withPreserveComment yes,
withStrictInput yes,
-- withHTTP []
withCurl [("curl--user-agent","AMU Digital Libraries Indexing Agent")]
]
downloadDocumentWithEncoding enc = readFromDocument [withParseHTML yes,
withWarnings no,
withEncodingErrors no,
withPreserveComment yes,
withInputEncoding enc,
-- withHTTP []]
withCurl []]
downloadXmlDocument = readFromDocument [withWarnings no,
withEncodingErrors no,
-- withHTTP []]
withCurl [] ]
data ShadowLibrary = ShadowLibrary { logoUrl :: Maybe String,
lname :: String,
abbrev :: String,
webpage :: String,
lLevel :: Int }
data ShadowItem = ShadowItem {
url :: Maybe String,
title :: String,
itype :: String,
originalDate :: Maybe String,
creator :: Maybe String,
format :: Maybe String,
lang :: Maybe String,
finalUrl :: String,
description :: Maybe String
} deriving (Show)
defaultShadowItem url title = ShadowItem {
url = Just url,
title = title,
itype = "periodical",
originalDate = Nothing,
creator = Nothing,
format = Just "pdf",
lang = Just "pol",
finalUrl = url,
description = Nothing }
getDuration :: Maybe String -> (Maybe UTCTime, Maybe UTCTime)
getDuration Nothing = (Nothing, Nothing)
getDuration (Just date) =
case date =~~ ("^(1[6789]|20)[0-9][0-9]$" :: String) of
Just year -> (Just (yearStart year), Just (yearEnd year))
otherwise ->
case date =~~ ("^((1[789]|20)[0-9][0-9])-(0[1-9]|1[0-2])$" :: String) :: Maybe [[String]] of
Just [[_, year, _, month]] -> (Just (monthStart year month), Just (monthEnd year month))
otherwise ->
case date =~~ ("^((1[789]|20)[0-9][0-9])-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" :: String) :: Maybe [[String]] of
Just [[_, year, _, month, day]] -> (Just (dayStart year month day), Just (dayEnd year month day))
otherwise ->
case date =~~ ("^((1[789]|20)[0-9][0-9])-((1[789]|20)[0-9][0-9])$" :: String) :: Maybe [[String]] of
Just [[_, yearb, _, yeare, _]] -> (Just (yearStart yearb), Just (yearEnd yeare))
otherwise -> (Nothing, Nothing)
where
yearAsInteger year = (read year) :: Integer
monthAsInt month = (read month) :: Int
dayAsInt day = (read day) :: Int
yearStart year = localTimeToTimeStamp $ LocalTime {
localDay = fromGregorian (yearAsInteger year) 1 1,
localTimeOfDay = midnight }
yearEnd year = localTimeToTimeStamp $ LocalTime {
localDay = fromGregorian (yearAsInteger year) 12 31,
localTimeOfDay = lastSecond }
monthStart year month = localTimeToTimeStamp $ LocalTime {
localDay = fromGregorian (yearAsInteger year) (monthAsInt month) 1,
localTimeOfDay = midnight }
monthEnd year month = localTimeToTimeStamp $ LocalTime {
localDay = fromGregorian (yearAsInteger year) (monthAsInt month) 31,
localTimeOfDay = lastSecond }
dayStart year month day = localTimeToTimeStamp $ LocalTime {
localDay = fromGregorian (yearAsInteger year) (monthAsInt month) (dayAsInt day),
localTimeOfDay = midnight }
dayEnd year month day = localTimeToTimeStamp $ LocalTime {
localDay = fromGregorian (yearAsInteger year) (monthAsInt month) (dayAsInt day),
localTimeOfDay = lastSecond }
lastSecond = TimeOfDay {todHour = 23, todMin = 59, todSec = 59 }
localTimeToTimeStamp ltime = localTimeToUTCTZ (tzByLabel Europe__Warsaw) ltime
-- zonedTimeToUTC $ ZonedTime {
-- zonedTimeToLocalTime = ltime,
-- zonedTimeZone = timeZoneForUTCTime Europe__Warsaw }
extractLinks xpathCondition = (downloadDocument &&& this)
>>> first (getXPathTrees xpathCondition
>>> getAttrValue "href")
>>> expandURIFixed
extractLinksGeneralized xpathCondition attr = (downloadDocument &&& this)
>>> first (getXPathTrees xpathCondition
>>> getAttrValue attr)
>>> expandURIFixed
rotateSecTh ((a, b), c) = ((a, c), b)
extractLinksWithText xpathCondition = (downloadDocument &&& this)
>>> first (getXPathTrees xpathCondition
>>> (getAttrValue "href"
&&& (listA (deep isText >>> getText)
>>> arr (intercalate " "))))
>>> arr rotateSecTh
>>> first expandURIFixed
extractLinksWithTitle xpathCondition = (downloadDocument &&& this)
>>> first (getXPathTrees xpathCondition
>>> (getAttrValue "href"
&&& getAttrValue "title"))
>>> arr rotateSecTh
>>> first expandURIFixed
urlPreFixer = arr (Data.List.Utils.replace "[" stupidLeftBracketMarker . Data.List.Utils.replace "]" stupidRightBracketMarker)
urlPostFixer = arr (Data.List.Utils.replace stupidLeftBracketMarker "%5B" . Data.List.Utils.replace stupidRightBracketMarker "%5D")
stupidLeftBracketMarker = "ddsfdfdfdfgfgfrrtrtrrtr"
stupidRightBracketMarker = "wqweweerererrtrtrtrtrtr"
expandURIFixed = (urlPreFixer *** urlPreFixer) >>> expandURI >>> urlPostFixer
extractText = (listA (deep isText >>> getText)
>>> arr (intercalate " "))
loopNext extract xpathConditionForNext = initialStep
>>> loopNextCore extract xpathConditionForNext
>>> arr fst
>>> unlistA
initialList :: [(String, a)]
initialList = []
initialStep = arr (const initialList) &&& this
loopNextCore extract xpathConditionForNext = second
(listA extract &&& extractNext xpathConditionForNext)
>>> arr expandList
>>> ifP (nextFound)
(second (arr fromJust >>> (downloadDocument &&& this))
>>> loopNextCore extract xpathConditionForNext)
(this)
nextFound :: ([(String,a)], Maybe String) -> Bool
nextFound (_, Just _) = True
nextFound (_, Nothing) = False
expandList (l, (e, n)) = (l ++ e, n)
extractNext xpathConditionForNext = listA
(first (getXPathTrees ("(" ++ xpathConditionForNext ++ ")[1]")
>>> getAttrValue "href")
>>> expandURIFixed)
>>> arr listToMaybe
extractFormat :: String -> Maybe String
extractFormat finalUrl
| ".gif" `isInfixOf` finalUrl = Just "gif"
| ".jpg" `isInfixOf` finalUrl = Just "jpg"
| ".djvu" `isInfixOf` finalUrl = Just "djvu"
| ".pdf" `isInfixOf` finalUrl = Just "pdf"
| ".doc" `isInfixOf` finalUrl = Just "doc"
| otherwise = Nothing
baseMonthNameToNumber :: String -> Maybe String
baseMonthNameToNumber "styczeń" = Just "01"
baseMonthNameToNumber "styczen" = Just "01"
baseMonthNameToNumber "stycznia" = Just "01"
baseMonthNameToNumber "luty" = Just "02"
baseMonthNameToNumber "lutego" = Just "02"
baseMonthNameToNumber "marzec" = Just "03"
baseMonthNameToNumber "marca" = Just "03"
baseMonthNameToNumber "kwiecień" = Just "04"
baseMonthNameToNumber "kwiecien" = Just "04"
baseMonthNameToNumber "kwietnia" = Just "04"
baseMonthNameToNumber "maj" = Just "05"
baseMonthNameToNumber "maja" = Just "05"
baseMonthNameToNumber "czerwiec" = Just "06"
baseMonthNameToNumber "czeerwiec" = Just "06"
baseMonthNameToNumber "czerwca" = Just "06"
baseMonthNameToNumber "lipiec" = Just "07"
baseMonthNameToNumber "lipca" = Just "07"
baseMonthNameToNumber "sierpień" = Just "08"
baseMonthNameToNumber "sierpien" = Just "08"
baseMonthNameToNumber "sierpnia" = Just "08"
baseMonthNameToNumber "wrzesień" = Just "09"
baseMonthNameToNumber "wrzesien" = Just "09"
baseMonthNameToNumber "września" = Just "09"
baseMonthNameToNumber "wrzesnia" = Just "09"
baseMonthNameToNumber "październik" = Just "10"
baseMonthNameToNumber "pażdziernik" = Just "10"
baseMonthNameToNumber "pazdziernik" = Just "10"
baseMonthNameToNumber "października" = Just "10"
baseMonthNameToNumber "pazdziernika" = Just "10"
baseMonthNameToNumber "listopad" = Just "11"
baseMonthNameToNumber "listopada" = Just "11"
baseMonthNameToNumber "grudzień" = Just "12"
baseMonthNameToNumber "grudzien" = Just "12"
baseMonthNameToNumber "grudnia" = Just "12"
baseMonthNameToNumber "jesien" = Just "10"
baseMonthNameToNumber _ = Nothing
extractYear :: String -> Maybe String
extractYear n =
case n =~~ ("(1[6789]|20)[0-9][0-9]" :: String) of
Just year -> Just year
otherwise -> Nothing
joinAlts :: [String] -> String
joinAlts = intercalate " // "
clean = arr (DLU.replace "\n" "")
>>> arr strip
extractItems shadowLibrary start extractor = do
page <- getWebPage start
items <- runX $ extractor start page
-- insertIntoDatabase shadowLibrary items
putStrLn (show items)
extractItemsStartingFromUrl shadowLibrary start extractor = do
items <- runX $ (arr (const start) >>> setTraceLevel 1 >>> extractor)
-- insertIntoDatabase shadowLibrary items
mapM_ (putStrLn . show) items

43
app/almanachmuszyny.hs Normal file
View File

@ -0,0 +1,43 @@
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
import ShadowLibrary.Core
import Text.XML.HXT.Core
import Text.XML.HXT.XPath
-- import Text.XML.HXT.Curl
import Data.List
import Data.List.Utils (replace)
import Text.Regex.Posix
import Text.Printf
extractRecords = extractLinksWithText "//a[@class='roczniki']"
>>> second (arr $ replace "\r\n " "")
>>> first (arr ((++"tr") . init))
>>> first (extractLinksWithText "//li/a[contains(@href,'.pdf')]")
toShadowItem :: ((String, String), String) -> ShadowItem
toShadowItem ((url, articleTitle), yearlyTitle) =
(defaultShadowItem url title) {
originalDate = Just date,
itype = "periodical",
format = Just "pdf",
finalUrl = url
}
where title = "Almanach Muszyny " ++ yearlyTitle ++ " " ++ (replace "\r\n" "" (replace "\r\n " "" articleTitle))
date = getDate url
getDate url =
case url =~~ "/(19[0-9][0-9]|20[0-9][0-9])/" :: Maybe [[String]] of
Just [[_, year]] -> year
otherwise -> error $ "unexpected url: " ++ url
main = do
let start = "http://www.almanachmuszyny.pl/"
let shadowLibrary = ShadowLibrary {logoUrl=Nothing,
lname="Almanach Muszyny",
abbrev="AlmMusz",
lLevel=0,
webpage=start}
extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem)

52
app/gazety.hs Normal file
View File

@ -0,0 +1,52 @@
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
import ShadowLibrary.Core
import Text.XML.HXT.Core
import Text.XML.HXT.XPath
import Text.XML.HXT.Curl
import Data.List
import Data.List.Utils (replace)
import Text.Regex.Posix
import Text.Printf
extractRecords = extractLinksWithText "//ul[@class='menu-topmenu menu-iconmenu']//li[@class='first' or @class='last' or not(@class)]//a"
>>> second (arr $ replace "\n\t \n\t\t" "")
>>> second (arr $ replace "\t \n " "")
>>> first (extractLinksWithText "//div[@class='jsn-article-content']//a[contains(.,'nr')]")
>>> first (second ( arr $ replace "ś" "s"))
>>> first (second ( arr $ replace "ń" "n"))
>>> first (second ( arr $ replace "ź" "z"))
toShadowItem :: ((String, String), String) -> ShadowItem
toShadowItem ((url, articleTitle), yearlyTitle) =
(defaultShadowItem url title) {
originalDate = extractDate articleTitle,
itype = "periodical",
format = Just "pdf",
finalUrl = url
}
where title = articleTitle
extractDate :: String -> Maybe String
extractDate url =
case url =~~ ("[A-Za-z]+ [0-9]{4}" :: String) of
Just date -> Just date
otherwise -> extractOtherDate url
extractOtherDate :: String -> Maybe String
extractOtherDate url =
case url =~~ ("[0-9]{4}_[0-9]{2}" :: String) of
Just date -> Just date
otherwise -> Just url
main = do
let start = "https://www.smpopowice.pl/"
let shadowLibrary = ShadowLibrary {logoUrl=Nothing,
lname="Nasze Popowice",
abbrev="NaszPop",
lLevel=0,
webpage=start}
extractItemsStartingFromUrl shadowLibrary start (extractRecords >>> arr toShadowItem)

57
gazety.out Normal file
View File

@ -0,0 +1,57 @@
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2020_03_nasze-popowice-nr_53.pdf", title = "Marzec 2020 \"Nasze popowice\" nr 53", itype = "periodical", originalDate = Just "Marzec 2020", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2020_03_nasze-popowice-nr_53.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2020_12_nasze-popowice-nr_54.pdf", title = "Grudzien 2020 \"Nasze popowice\" nr 54", itype = "periodical", originalDate = Just "Grudzien 2020", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2020_12_nasze-popowice-nr_54.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2019_03_nasze-popowice-nr_50.pdf", title = "Marzec 2019 \"Nasze popowice\" nr 50", itype = "periodical", originalDate = Just "Marzec 2019", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2019_03_nasze-popowice-nr_50.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2019_05_nasze-popowice-nr_51.pdf", title = "Maj 2019 \"Nasze popowice\" nr 51", itype = "periodical", originalDate = Just "Maj 2019", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2019_05_nasze-popowice-nr_51.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2019_12_nasze-popowice-nr_52.pdf", title = "Grudzien 2019 \"Nasze popowice\" nr 52", itype = "periodical", originalDate = Just "Grudzien 2019", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2019_12_nasze-popowice-nr_52.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2018_04_nasze-popowice-nr_47.pdf", title = "Kwiecien 2018 \"Nasze popowice\" nr 47", itype = "periodical", originalDate = Just "Kwiecien 2018", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2018_04_nasze-popowice-nr_47.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2018_06_nasze-popowice-nr_48.pdf", title = "Czerwiec 2018 \"Nasze popowice\" nr 48", itype = "periodical", originalDate = Just "Czerwiec 2018", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2018_06_nasze-popowice-nr_48.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2018_11_nasze-popowice-nr_49.pdf", title = "Listopad 2018 \"Nasze popowice\" nr 49", itype = "periodical", originalDate = Just "Listopad 2018", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2018_11_nasze-popowice-nr_49.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2017_06_nasze-popowice-nr_45_cz-1.pdf", title = "Czerwiec 2017 \"Nasze popowice\" nr 45 cz.1", itype = "periodical", originalDate = Just "Czerwiec 2017", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2017_06_nasze-popowice-nr_45_cz-1.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2017_06_nasze-popowice-nr_45_cz-2.pdf", title = "Czerwiec 2017 \"Nasze popowice\" nr 45 cz.2", itype = "periodical", originalDate = Just "Czerwiec 2017", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2017_06_nasze-popowice-nr_45_cz-2.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2017_06_nasze-popowice-nr_45_cz-3.pdf", title = "Czerwiec 2017 \"Nasze popowice\" nr 45 cz.3", itype = "periodical", originalDate = Just "Czerwiec 2017", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2017_06_nasze-popowice-nr_45_cz-3.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2017_12_nasze-popowice-nr_46.pdf", title = "Grudzien 2017 \"Nasze popowice\" nr 46", itype = "periodical", originalDate = Just "Grudzien 2017", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2017_12_nasze-popowice-nr_46.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2016_06_nasze-popowice-nr_43.pdf", title = "Czerwiec 2016 \"Nasze popowice\" nr 43", itype = "periodical", originalDate = Just "Czerwiec 2016", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2016_06_nasze-popowice-nr_43.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2016_12_nasze-popowice-nr_44.pdf", title = "Grudzien 2016 \"Nasze popowice\" nr 44", itype = "periodical", originalDate = Just "Grudzien 2016", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2016_12_nasze-popowice-nr_44.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2015_04_nasze-popowice-nr_40.pdf", title = "Kwiecien 2015 \"Nasze popowice\" nr 40", itype = "periodical", originalDate = Just "Kwiecien 2015", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2015_04_nasze-popowice-nr_40.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2015_05_nasze-popowice-nr_41.pdf", title = "Maj 2015 \"Nasze popowice\" nr 41", itype = "periodical", originalDate = Just "Maj 2015", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2015_05_nasze-popowice-nr_41.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2015_12_nasze-popowice-nr_42.pdf", title = "Grudzien 2015 \"Nasze popowice\" nr 42", itype = "periodical", originalDate = Just "Grudzien 2015", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2015_12_nasze-popowice-nr_42.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2014_04_nasze-popowice-nr_37.pdf", title = "Kwiecien 2014 \"Nasze popowice\" nr 37", itype = "periodical", originalDate = Just "Kwiecien 2014", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2014_04_nasze-popowice-nr_37.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2014_06_nasze-popowice-nr_38.pdf", title = "Czerwiec 2014 \"Nasze popowice\" nr 38", itype = "periodical", originalDate = Just "Czerwiec 2014", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2014_06_nasze-popowice-nr_38.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2014_12_nasze-popowice-nr_39.pdf", title = "Grudzien 2014 \"Nasze popowice\" nr 39", itype = "periodical", originalDate = Just "Grudzien 2014", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2014_12_nasze-popowice-nr_39.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2013_03_nasze-popowice-nr_33.pdf", title = "Marzec 2013 \"Nasze popowice\" nr 33", itype = "periodical", originalDate = Just "Marzec 2013", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2013_03_nasze-popowice-nr_33.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2013_06_nasze-popowice-nr_34.pdf", title = "Czerwiec 2013 \"Nasze popowice\" nr 34", itype = "periodical", originalDate = Just "Czerwiec 2013", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2013_06_nasze-popowice-nr_34.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2013_11_nasze-popowice-nr_35.pdf", title = "Listopad 2013 \"Nasze popowice\" nr 35", itype = "periodical", originalDate = Just "Listopad 2013", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2013_11_nasze-popowice-nr_35.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2013_12_nasze-popowice-nr_36.pdf", title = "Grudzien 2013 \"Nasze popowice\" nr 36", itype = "periodical", originalDate = Just "Grudzien 2013", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2013_12_nasze-popowice-nr_36.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2012_04_nasze-popowice-nr_29.pdf", title = "Kwiecien 2012 \"Nasze popowice\" nr 29", itype = "periodical", originalDate = Just "Kwiecien 2012", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2012_04_nasze-popowice-nr_29.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2012_05_nasze-popowice-nr_30.pdf", title = "Maj 2012 \"Nasze popowice\" nr 30", itype = "periodical", originalDate = Just "Maj 2012", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2012_05_nasze-popowice-nr_30.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2012_10_nasze-popowice-nr_31.pdf", title = "Pazdziernik 2012 \"Nasze popowice\" nr 31", itype = "periodical", originalDate = Just "Pazdziernik 2012", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2012_10_nasze-popowice-nr_31.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2012_12_nasze-popowice-nr_32.pdf", title = "Grudzien 2012 \"Nasze popowice\" nr 32", itype = "periodical", originalDate = Just "Grudzien 2012", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2012_12_nasze-popowice-nr_32.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2011_04_nasze-popowice-nr_25.pdf", title = "Kwiecien 2011 \"Nasze popowice\" nr 25", itype = "periodical", originalDate = Just "Kwiecien 2011", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2011_04_nasze-popowice-nr_25.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2011_05_nasze-popowice-nr_26.pdf", title = "Maj 2011 \"Nasze popowice\" nr 26", itype = "periodical", originalDate = Just "Maj 2011", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2011_05_nasze-popowice-nr_26.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2011_11_nasze-popowice-nr_27.pdf", title = "Listopad 2011 \"Nasze popowice\" nr 27", itype = "periodical", originalDate = Just "Listopad 2011", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2011_11_nasze-popowice-nr_27.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2011_12_nasze-popowice-nr_28.pdf", title = "Grudzien 2011 \"Nasze popowice\" nr 28", itype = "periodical", originalDate = Just "Grudzien 2011", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2011_12_nasze-popowice-nr_28.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2010_03_nasze-popowice-nr_21.pdf", title = "Marzec 2010 \"Nasze popowice\" nr 21", itype = "periodical", originalDate = Just "Marzec 2010", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2010_03_nasze-popowice-nr_21.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2010_05_nasze-popowice-nr_22.pdf", title = "Maj 2010 \"Nasze popowice\" nr 22", itype = "periodical", originalDate = Just "Maj 2010", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2010_05_nasze-popowice-nr_22.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2010_09_nasze-popowice-nr_23.pdf", title = "Wrzesien 2010 \"Nasze popowice\" nr 23", itype = "periodical", originalDate = Just "Wrzesien 2010", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2010_09_nasze-popowice-nr_23.pdf", description = Nothing}
ShadowItem {url = Just "https://www.smpopowice.pl/gazeta/2010_12_nasze-popowice-nr_24.pdf", title = "Grudzien 2010 \"Nasze popowice\" nr 24", itype = "periodical", originalDate = Just "Grudzien 2010", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "https://www.smpopowice.pl/gazeta/2010_12_nasze-popowice-nr_24.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2009_03_nasze-popowice-nr_16.pdf", title = "2009_03_nasze-popowice-nr_16.pdf", itype = "periodical", originalDate = Just "2009_03", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2009_03_nasze-popowice-nr_16.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2009_05_nasze-popowice-nr_17.pdf", title = "2009_05_nasze-popowice-nr_17.pdf", itype = "periodical", originalDate = Just "2009_05", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2009_05_nasze-popowice-nr_17.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2009_07_nasze-popowice-nr_18.pdf", title = "2009_07_nasze-popowice-nr_18.pdf", itype = "periodical", originalDate = Just "2009_07", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2009_07_nasze-popowice-nr_18.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2009_10_nasze-popowice-nr-19.pdf", title = "2009_10_nasze-popowice-nr-19.pdf", itype = "periodical", originalDate = Just "2009_10", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2009_10_nasze-popowice-nr-19.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2009_12_nasze-popowice-nr_20.pdf", title = "2009_12_nasze-popowice-nr_20.pdf", itype = "periodical", originalDate = Just "2009_12", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2009_12_nasze-popowice-nr_20.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2008_02_nasze-popowice-nr_11.pdf", title = "2008_02_nasze-popowice-nr_11.pdf", itype = "periodical", originalDate = Just "2008_02", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2008_02_nasze-popowice-nr_11.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2008_05_nasze%20popowice-nr_12.pdf", title = "2008_05_nasze popowice-nr_12.pdf", itype = "periodical", originalDate = Just "2008_05", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2008_05_nasze%20popowice-nr_12.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2008_07_nasze-popowice-nr_13-.pdf", title = "2008_07_nasze-popowice-nr_13-.pdf", itype = "periodical", originalDate = Just "2008_07", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2008_07_nasze-popowice-nr_13-.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2008_10_nasze-popowice-nr_14.pdf", title = "2008_10_nasze-popowice-nr_14.pdf", itype = "periodical", originalDate = Just "2008_10", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2008_10_nasze-popowice-nr_14.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2008_12_nasze-popowice-nr_15.pdf", title = "2008_12_nasze-popowice-nr_15.pdf", itype = "periodical", originalDate = Just "2008_12", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2008_12_nasze-popowice-nr_15.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2007_03_nasze-popowice-nr_06.pdf", title = "2007_03_nasze-popowice-nr_06.pdf", itype = "periodical", originalDate = Just "2007_03", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2007_03_nasze-popowice-nr_06.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2007_04_nasze%20popowice-nr_07.pdf", title = "2007_04_nasze popowice-nr_07.pdf", itype = "periodical", originalDate = Just "2007_04", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2007_04_nasze%20popowice-nr_07.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2007_06_nasze%20popowice-nr_08.pdf", title = "2007_06_nasze popowice-nr_08.pdf", itype = "periodical", originalDate = Just "2007_06", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2007_06_nasze%20popowice-nr_08.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2007_09_nasze%20popowice-nr_09.pdf", title = "2007_09_nasze popowice-nr_09.pdf", itype = "periodical", originalDate = Just "2007_09", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2007_09_nasze%20popowice-nr_09.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2007_12_nasze%20popowice-nr_10.pdf", title = "2007_12_nasze popowice-nr_10.pdf", itype = "periodical", originalDate = Just "2007_12", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2007_12_nasze%20popowice-nr_10.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2006_03_nasze-popowice-nr_02.pdf", title = "2006_03_nasze-popowice-nr_02.pdf", itype = "periodical", originalDate = Just "2006_03", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2006_03_nasze-popowice-nr_02.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2006_05_nasze-popowice-nr_03.pdf", title = "2006_05_nasze-popowice-nr_03.pdf", itype = "periodical", originalDate = Just "2006_05", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2006_05_nasze-popowice-nr_03.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2006_10_nasze-popowice-nr_04.pdf", title = "2006_10_nasze-popowice-nr_04.pdf", itype = "periodical", originalDate = Just "2006_10", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2006_10_nasze-popowice-nr_04.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2006_12_nasze-popowice-nr_05.pdf", title = "2006_12_nasze-popowice-nr_05.pdf", itype = "periodical", originalDate = Just "2006_12", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2006_12_nasze-popowice-nr_05.pdf", description = Nothing}
ShadowItem {url = Just "http://www.smpopowice.pl/gazeta/2005_12_nasze-popowice-nr_01.pdf", title = "2005_12_nasze-popowice-nr_01.pdf", itype = "periodical", originalDate = Just "2005_12", creator = Nothing, format = Just "pdf", lang = Just "pol", finalUrl = "http://www.smpopowice.pl/gazeta/2005_12_nasze-popowice-nr_01.pdf", description = Nothing}

67
shadow-library.cabal Normal file
View File

@ -0,0 +1,67 @@
name: shadow-library
version: 0.1.0.0
synopsis: Initial project template from stack
description: Please see README.md
homepage: http://github.com/name/project
license: Proprietary
license-file: LICENSE
author: Your name here
maintainer: your.address@example.com
-- copyright:
category: Web
build-type: Simple
-- extra-source-files:
cabal-version: >=1.10
library
hs-source-dirs: .
exposed-modules: ShadowLibrary.Core
build-depends: base >= 4.7 && < 5
, HTTP
, hxt
, hxt-http
, hxt-xpath
, hxt-curl
, MissingH
, monad-logger
, mtl
, network-uri
, regex-pcre
, regex-tdfa
, resourcet
, text
, time
, transformers
, tz
default-language: Haskell2010
-- executable maly-modelarz-exe
-- hs-source-dirs: app
-- main-is: malymodelarz.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , hxt
-- , hxt-curl
-- , hxt-xpath
-- , MissingH
-- , regex-posix
-- , shadow-library
-- default-language: Haskell2010
executable gazety
hs-source-dirs: app
main-is: gazety.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, hxt
, hxt-curl
, hxt-xpath
, MissingH
, regex-posix
, shadow-library
default-language: Haskell2010
source-repository head
type: git
location: https://github.com/name/project

5
stack.yaml Normal file
View File

@ -0,0 +1,5 @@
flags: {}
packages:
- '.'
extra-deps: [hxt-xpath-9.1.2.2]
resolver: lts-11.9

19
stack.yaml.lock Normal file
View File

@ -0,0 +1,19 @@
# This file was autogenerated by Stack.
# You should not edit this file by hand.
# For more information, please see the documentation at:
# https://docs.haskellstack.org/en/stable/lock_files
packages:
- completed:
hackage: hxt-xpath-9.1.2.2@sha256:9cd590ae93a04573db8f90fa4094625ebd97dded45da7667c577ce6b38a42900,1999
pantry-tree:
size: 2225
sha256: aee2f75974e868ff429b8ff349a29667536c60397098f5dfedc968d1951511bb
original:
hackage: hxt-xpath-9.1.2.2
snapshots:
- completed:
size: 507596
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/11/9.yaml
sha256: 42f472dbf06482da1b3319241f3e3b3593a45bd7d4f537d2789f21386b9b2ad3
original: lts-11.9