sequences added

This commit is contained in:
Wojtek 2019-12-11 22:49:43 +00:00
parent f6a9702b3a
commit 52241b5d66
2 changed files with 25 additions and 2 deletions

View File

@ -2,7 +2,7 @@ import scrapy
from scrapy.spiders import CrawlSpider, Rule from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor from scrapy.linkextractors import LinkExtractor
import re import re
from items import MovieItem from items import MovieItem, Description, Sequence
class FinaSpider(CrawlSpider): class FinaSpider(CrawlSpider):
@ -37,5 +37,17 @@ class FinaSpider(CrawlSpider):
item['url'] = response.url item['url'] = response.url
#TODO #TODO
#description: #description:
desc = Description()
desc['fullTitle'] = response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[3]/div/div[2]/span[2]/text()').get()
desc['desc'] = response.xpath('//*[@id="content"]/div[3]/article/div[1]/div/div[2]/div[1]/div/div[2]/span/p/text()').get()
desc['date'] = response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[3]/div/div[3]/span[2]/text()').get()
seq = {}
for row in response.xpath('//*[@id="content"]/div[3]/article/div[1]/div/div[2]/div[2]/div/div/table/tr'):
seq[ row.xpath('td[1]/span/text()').get() ] = row.xpath('td[2]/span/text()').get()
desc['sequence'] = dict(seq)
item['description'] = dict(desc)
return item return item

View File

@ -1,7 +1,18 @@
from scrapy.item import Item, Field from scrapy.item import Item, Field
class Sequence(Item):
seqTime = Field()
seqVal = Field()
class Description(Item):
fullTitle = Field()
sequence = Field()
date = Field()
desc = Field()
class MovieItem(Item): class MovieItem(Item):
url = Field() url = Field()
title = Field() title = Field()
mp4 = Field() mp4 = Field()
#description = Field() description = Field()