scrapy.Request 爬中爬

2017-07-12  本文已影响36人  心愿2016

import scrapy
from myproject.items import MyItem
class MySpider(scrapy.Spider):
name = 'myspider'
start_urls = (
'http://example.com/page1',
'http://example.com/page2',
)

def parse(self, response):
    # collect `item_urls`
    for item_url in item_urls:
        yield scrapy.Request(item_url, self.parse_item)

def parse_item(self, response):
    item = MyItem()
    # populate `item` fields
    # and extract item_details_url
    yield scrapy.Request(item_details_url, self.parse_details, meta={'item': item})

def parse_details(self, response):
    item = response.meta['item']
    # populate more `item` fields
    return item
59FC89FB-184B-4CCF-912E-1BD705D233DB.png
上一篇下一篇

猜你喜欢

热点阅读