spiders
This commit is contained in:
parent
d20baf5bd8
commit
4f05c132fd
@ -4,7 +4,6 @@ from scrapy.linkextractors import LinkExtractor
|
|||||||
import re
|
import re
|
||||||
from items import MovieItem
|
from items import MovieItem
|
||||||
|
|
||||||
from fina_scrap.items import MovieItem
|
|
||||||
|
|
||||||
class FinaSpider(CrawlSpider):
|
class FinaSpider(CrawlSpider):
|
||||||
name = 'repozytorium.fn.org.pl'
|
name = 'repozytorium.fn.org.pl'
|
||||||
@ -27,12 +26,12 @@ class FinaSpider(CrawlSpider):
|
|||||||
|
|
||||||
def parse_item(self, response):
|
def parse_item(self, response):
|
||||||
|
|
||||||
def getMp4(self, text):
|
def getMp4(text):
|
||||||
x = re.findall('file: encodeURI\("(.+?\.mp4)"\)',text)
|
x = re.findall('file: encodeURI\("(.+?\.mp4)"\)',text)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
item = MovieItem()
|
item = MovieItem()
|
||||||
item['title'] = response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[1]/div/div/span/text()').get()
|
item['title'] = response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[1]/div/div/span/text()').get()
|
||||||
item['mp4'] = self.getMp4(response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[2]/div/script/text()').get())
|
item['mp4'] = getMp4(response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[2]/div/script/text()').get())
|
||||||
item['url'] = response.url
|
item['url'] = response.url
|
||||||
return item
|
return item
|
||||||
|
Loading…
Reference in New Issue
Block a user