forked from filipg/twilight-library
test
This commit is contained in:
parent
c9457d0168
commit
07cf42c6d8
71
app/bip.hs
Normal file
71
app/bip.hs
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
|
||||||
|
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
|
||||||
|
import ShadowLibrary.Core
|
||||||
|
|
||||||
|
import Text.XML.HXT.Core
|
||||||
|
import Text.XML.HXT.XPath
|
||||||
|
import Data.List
|
||||||
|
import Data.List.Utils (replace)
|
||||||
|
|
||||||
|
import Text.Regex.Posix
|
||||||
|
import Text.Printf
|
||||||
|
import System.IO
|
||||||
|
|
||||||
|
|
||||||
|
extractYears = extractLinksWithText "//div[@class='inner clearfix']//div[@class='row']//div[@class='col-sm-6']//ul//li//a" -- pary adres-tytuł
|
||||||
|
runYearsExtractor url = runX $ (arr (const url) >>> setTraceLevel 1 >>> extractYears)
|
||||||
|
|
||||||
|
extractNewFiles = extractLinksWithText "//a[@title='Pobierz plik']" -- pary adres-tytuł
|
||||||
|
runNewFilesExtractor url = runX $ (arr (const url) >>> setTraceLevel 1 >>> extractNewFiles)
|
||||||
|
|
||||||
|
extractOldFiles = extractLinksWithText "//div[@class='content']//div[@class='inner clearfix']//ul//li//a" -- pary adres-tytuł
|
||||||
|
runOldFilesExtractor url = runX $ (arr (const url) >>> setTraceLevel 1 >>> extractOldFiles)
|
||||||
|
|
||||||
|
--367-376
|
||||||
|
extractOldYears = extractLinksWithText "//nav[@class='nav-left']//ul//li[@class='single-article']/a" -- pary adres-tytuł
|
||||||
|
runOldYearsExtractor url = runX $ (arr (const url) >>> setTraceLevel 1 >>> extractOldYears)
|
||||||
|
|
||||||
|
mapToUrl :: ([Char], [Char]) -> [Char]
|
||||||
|
mapToUrl (url, title) = url
|
||||||
|
|
||||||
|
merge [] ys = ys
|
||||||
|
merge (x:xs) ys = x:merge ys xs
|
||||||
|
|
||||||
|
repeatNTimes 0 = return []
|
||||||
|
repeatNTimes n = do
|
||||||
|
let link = "https://bip.um.gdynia.pl/module/Contents/controller/Default/action/loadSubcategory/subcategory_id/" ++ show (377-n)
|
||||||
|
items <- runOldYearsExtractor link
|
||||||
|
let urls = map mapToUrl items
|
||||||
|
recursive <- repeatNTimes (n-1)
|
||||||
|
let results = merge urls recursive
|
||||||
|
return results
|
||||||
|
|
||||||
|
flatten :: [[a]] -> [a]
|
||||||
|
flatten xs = (\z n -> foldr (\x y -> foldr z y x) n xs) (:) []
|
||||||
|
|
||||||
|
getAllPages url = do
|
||||||
|
items <- runYearsExtractor url
|
||||||
|
let urls = map mapToUrl items
|
||||||
|
newFiles <- mapM runNewFilesExtractor urls
|
||||||
|
let fileUrls = map mapToUrl (flatten newFiles)
|
||||||
|
return fileUrls
|
||||||
|
|
||||||
|
getOldFiles = do
|
||||||
|
oldPages <- repeatNTimes 10
|
||||||
|
--let sub = take 2 oldPages
|
||||||
|
oldFiles <- mapM runOldFilesExtractor oldPages
|
||||||
|
let fileUrls = map mapToUrl (flatten oldFiles)
|
||||||
|
return fileUrls
|
||||||
|
|
||||||
|
main = do
|
||||||
|
let start = "https://bip.um.gdynia.pl/wyszukiwarka-uchwal-rady-miasta,7485/wyszukiwarka-uchwal,504923"
|
||||||
|
oldResults <- getOldFiles
|
||||||
|
oldF <- openFile "oldFiles.json" WriteMode
|
||||||
|
hPrint oldF oldResults
|
||||||
|
hClose oldF
|
||||||
|
|
||||||
|
--print (length results)
|
||||||
|
newResults <- getAllPages start
|
||||||
|
newF <- openFile "newFiles.json" WriteMode
|
||||||
|
hPrint newF newResults
|
||||||
|
hClose newF
|
8267
newFiles.json
Normal file
8267
newFiles.json
Normal file
File diff suppressed because it is too large
Load Diff
1921
oldFiles.json
Normal file
1921
oldFiles.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -72,6 +72,18 @@ executable pbsociety
|
|||||||
, shadow-library
|
, shadow-library
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
executable bip
|
||||||
|
hs-source-dirs: app
|
||||||
|
main-is: bip.hs
|
||||||
|
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||||
|
build-depends: base
|
||||||
|
, hxt
|
||||||
|
, hxt-xpath
|
||||||
|
, MissingH
|
||||||
|
, regex-posix
|
||||||
|
, shadow-library
|
||||||
|
default-language: Haskell2010
|
||||||
|
|
||||||
source-repository head
|
source-repository head
|
||||||
type: git
|
type: git
|
||||||
location: https://github.com/name/project
|
location: https://github.com/name/project
|
||||||
|
Loading…
Reference in New Issue
Block a user