sequences added
This commit is contained in:
parent
f6a9702b3a
commit
52241b5d66
@ -2,7 +2,7 @@ import scrapy
|
||||
from scrapy.spiders import CrawlSpider, Rule
|
||||
from scrapy.linkextractors import LinkExtractor
|
||||
import re
|
||||
from items import MovieItem
|
||||
from items import MovieItem, Description, Sequence
|
||||
|
||||
|
||||
class FinaSpider(CrawlSpider):
|
||||
@ -37,5 +37,17 @@ class FinaSpider(CrawlSpider):
|
||||
item['url'] = response.url
|
||||
#TODO
|
||||
#description:
|
||||
desc = Description()
|
||||
desc['fullTitle'] = response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[3]/div/div[2]/span[2]/text()').get()
|
||||
desc['desc'] = response.xpath('//*[@id="content"]/div[3]/article/div[1]/div/div[2]/div[1]/div/div[2]/span/p/text()').get()
|
||||
desc['date'] = response.xpath('//*[@id="block-fnfilm-fnfilm"]/div/div[3]/div/div[3]/span[2]/text()').get()
|
||||
|
||||
seq = {}
|
||||
for row in response.xpath('//*[@id="content"]/div[3]/article/div[1]/div/div[2]/div[2]/div/div/table/tr'):
|
||||
seq[ row.xpath('td[1]/span/text()').get() ] = row.xpath('td[2]/span/text()').get()
|
||||
|
||||
desc['sequence'] = dict(seq)
|
||||
|
||||
item['description'] = dict(desc)
|
||||
|
||||
return item
|
||||
|
@ -1,7 +1,18 @@
|
||||
from scrapy.item import Item, Field
|
||||
|
||||
class Sequence(Item):
|
||||
seqTime = Field()
|
||||
seqVal = Field()
|
||||
|
||||
class Description(Item):
|
||||
fullTitle = Field()
|
||||
sequence = Field()
|
||||
date = Field()
|
||||
desc = Field()
|
||||
|
||||
|
||||
class MovieItem(Item):
|
||||
url = Field()
|
||||
title = Field()
|
||||
mp4 = Field()
|
||||
#description = Field()
|
||||
description = Field()
|
||||
|
Loading…
Reference in New Issue
Block a user