title extraction
This commit is contained in:
parent
1f23d94e7c
commit
0114a102d4
@ -17,7 +17,7 @@ extractRecords = extractLinks "//a[contains(@href, '.pdf')]"
|
|||||||
|
|
||||||
toShadowItem :: String -> ShadowItem
|
toShadowItem :: String -> ShadowItem
|
||||||
toShadowItem url =
|
toShadowItem url =
|
||||||
(defaultShadowItem url []) {
|
(defaultShadowItem url title) {
|
||||||
originalDate = Just date,
|
originalDate = Just date,
|
||||||
itype = "periodical",
|
itype = "periodical",
|
||||||
format = Just "pdf",
|
format = Just "pdf",
|
||||||
@ -25,6 +25,8 @@ toShadowItem url =
|
|||||||
}
|
}
|
||||||
where
|
where
|
||||||
date = last $ getAllTextMatches $ url =~ "(19[0-9][0-9]|20[0-9][0-9])" :: String
|
date = last $ getAllTextMatches $ url =~ "(19[0-9][0-9]|20[0-9][0-9])" :: String
|
||||||
|
titleToProcess = last $ getAllTextMatches $ url =~ "/[a-zA-Z ]+[-_]" :: String
|
||||||
|
title = titleToProcess =~ "[a-zA-Z ]+" :: String
|
||||||
|
|
||||||
|
|
||||||
main = do
|
main = do
|
||||||
|
Loading…
Reference in New Issue
Block a user