forked from filipg/twilight-library
title extraction
This commit is contained in:
parent
1f23d94e7c
commit
0114a102d4
@ -17,7 +17,7 @@ extractRecords = extractLinks "//a[contains(@href, '.pdf')]"
|
||||
|
||||
toShadowItem :: String -> ShadowItem
|
||||
toShadowItem url =
|
||||
(defaultShadowItem url []) {
|
||||
(defaultShadowItem url title) {
|
||||
originalDate = Just date,
|
||||
itype = "periodical",
|
||||
format = Just "pdf",
|
||||
@ -25,6 +25,8 @@ toShadowItem url =
|
||||
}
|
||||
where
|
||||
date = last $ getAllTextMatches $ url =~ "(19[0-9][0-9]|20[0-9][0-9])" :: String
|
||||
titleToProcess = last $ getAllTextMatches $ url =~ "/[a-zA-Z ]+[-_]" :: String
|
||||
title = titleToProcess =~ "[a-zA-Z ]+" :: String
|
||||
|
||||
|
||||
main = do
|
||||
|
Loading…
Reference in New Issue
Block a user