From 8eedac11d771e114f73487d29deebf5d507da33f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20G=C3=B3reczny?= Date: Sun, 4 Apr 2021 00:46:19 +0200 Subject: [PATCH] change file --- app/maszynista.hs | 55 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/app/maszynista.hs b/app/maszynista.hs index 622a67e..1cd282a 100644 --- a/app/maszynista.hs +++ b/app/maszynista.hs @@ -6,7 +6,8 @@ import Text.XML.HXT.Core import Text.XML.HXT.XPath -- import Text.XML.HXT.Curl import Data.List -import Data.List.Utils (replace) + +import Data.List.Utils (replace, split, join) import Text.Regex.Posix import Text.Printf @@ -25,7 +26,39 @@ extractRecords = extractLinksWithText "//section[@class='widget widget_zzm_gm_ar >>> first (arr $ first (arr $ second(arr $ replace "ś" "s"))) >>> first (arr $ first (arr $ second(arr $ replace "ź" "z"))) >>> first (arr $ first (arr $ second(arr $ replace "ł" "l"))) - + >>> first (arr $ second (arr $ replace "Styczen" "01")) + >>> first (arr $ second (arr $ replace "styczen" "01")) + >>> first (arr $ second (arr $ replace "Luty" "02")) + >>> first (arr $ second (arr $ replace "luty" "02")) + >>> first (arr $ second (arr $ replace "Marzec" "03")) + >>> first (arr $ second (arr $ replace "marzec" "03")) + >>> first (arr $ second (arr $ replace "Kwiecien" "04")) + >>> first (arr $ second (arr $ replace "kwiecien" "04")) + >>> first (arr $ second (arr $ replace "Maj" "05")) + >>> first (arr $ second (arr $ replace "maj" "05")) + >>> first (arr $ second (arr $ replace "Czerwiec" "06")) + >>> first (arr $ second (arr $ replace "czerwiec" "06")) + >>> first (arr $ second (arr $ replace "Lipiec" "07")) + >>> first (arr $ second (arr $ replace "lipiec" "07")) + >>> first (arr $ second (arr $ replace "Sierpien" "08")) + >>> first (arr $ second (arr $ replace "sierpien" "08")) + >>> first (arr $ second (arr $ replace "sierpnia" "08")) + >>> first (arr $ second (arr $ replace "Wrzesien" "09")) + >>> first (arr $ second (arr $ replace "wrzesien" "09" )) + >>> first (arr $ second (arr $ replace "wrzesnia" "09" )) + >>> first (arr $ second (arr $ replace "pazdziernika" "10" )) + >>> first (arr $ second (arr $ replace "Pazdziernik" "10" )) + >>> first (arr $ second (arr $ replace "pazdziernik" "10" )) + >>> first (arr $ second (arr $ replace "listopada" "11" )) + >>> first (arr $ second (arr $ replace "Listopad" "11" )) + >>> first (arr $ second (arr $ replace "listopad" "11" )) + >>> first (arr $ second (arr $ replace "Grudzien" "12")) + >>> first (arr $ second (arr $ replace "grudzien" "12")) + >>> first (arr $ second (arr $ replace "grudnia" "12")) + >>> first (arr $ second (arr $ replace " " "-")) + + + -- ... a tutaj te trójki przerabiamy do docelowej struktury ShadowItem @@ -37,13 +70,19 @@ toShadowItem (((url, articleTitle), articleDatePart), year) = format = Just "pdf", finalUrl = url } - where title = articleTitle + where title = fixTitle articleTitle + +fixTitle :: String -> String +fixTitle title + | title =~ "Glos Maszynisty .*" = title + | otherwise = "Glos Maszynisty " ++ title + +getDate :: String -> String +getDate url = + case url =~~ ("[0-9]*-?[0-9]{2}-[0-9]{4}" :: String) of + Just date -> concat([join "-" (reverse (split "-" date))]) :: String + otherwise -> "" -getDate :: String -> Maybe String -getDate url = - case url =~~ ("[0-9]* ?[a-zA-Z]+ [0-9]{4}" :: String) of - Just year -> Just year - otherwise -> Nothing main = do