From 0114a102d4424f00128d6a0d8257188cf936195c Mon Sep 17 00:00:00 2001 From: s470623 Date: Wed, 15 Jun 2022 22:27:32 +0200 Subject: [PATCH] title extraction --- app/teatrLalek.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/teatrLalek.hs b/app/teatrLalek.hs index 575cc52..4347b23 100644 --- a/app/teatrLalek.hs +++ b/app/teatrLalek.hs @@ -17,7 +17,7 @@ extractRecords = extractLinks "//a[contains(@href, '.pdf')]" toShadowItem :: String -> ShadowItem toShadowItem url = - (defaultShadowItem url []) { + (defaultShadowItem url title) { originalDate = Just date, itype = "periodical", format = Just "pdf", @@ -25,6 +25,8 @@ toShadowItem url = } where date = last $ getAllTextMatches $ url =~ "(19[0-9][0-9]|20[0-9][0-9])" :: String + titleToProcess = last $ getAllTextMatches $ url =~ "/[a-zA-Z ]+[-_]" :: String + title = titleToProcess =~ "[a-zA-Z ]+" :: String main = do