This commit is contained in:
Wojtek 2019-12-11 18:55:46 +00:00
parent d20baf5bd8
commit 4f05c132fd

View File

@ -4,7 +4,6 @@ from scrapy.linkextractors import LinkExtractor
import re
from items import MovieItem
from fina_scrap.items import MovieItem
class FinaSpider(CrawlSpider):
name = 'repozytorium.fn.org.pl'
@ -27,12 +26,12 @@ class FinaSpider(CrawlSpider):
def parse_item(self, response):
def getMp4(self, text):
def getMp4(text):
x = re.findall('file: encodeURI\("(.+?\.mp4)"\)',text)
return x
item = MovieItem()
item['title'] = response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[1]/div/div/span/text()').get()
item['mp4'] = self.getMp4(response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[2]/div/script/text()').get())
item['mp4'] = getMp4(response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[2]/div/script/text()').get())
item['url'] = response.url
return item