scrapy signal的用法

2018-09-03  本文已影响0人  Py_Explorer

与settings同级创建一个py文件

1. 撸代码

#coding=utf-8
from scrapy import signals
hahaha = 0
class QianlongwangSpiderMiddleware(object):
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the spider middleware does not modify the
    # passed objects.

    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened,   signal=signals.spider_opened)
        crawler.signals.connect(s.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
    crawler.signals.connect(s.spider_error, signal=signals.spider_error)
        crawler.signals.connect(s.spider_idle, signal=signals.spider_idle)
        return s

    # 当spider开始爬取时发送该信号。该信号一般用来分配spider的资源,不过其也能做任何事。
    def spider_opened(self, spider):
        spider.logger.info('pa chong kai shi le: %s' % spider.name)
        print('start','1')


    def item_scraped(self,item, response, spider):
        global hahaha
        hahaha += 1

    # 当某个spider被关闭时,该信号被发送。该信号可以用来释放每个spider在 spider_opened 时占用的资源。
    def spider_closed(self,spider, reason):
        print('-------------------------------all over------------------------------------------')
        global hahaha
        print(spider.name,' closed')

      # 当spider的回调函数产生错误时(例如,抛出异常),该信号被发送。
    def spider_error(self,failure, response, spider):
        code = response.status
        print('spider error')

      # 当spider进入空闲(idle)状态时该信号被发送。空闲意味着:
      #    requests正在等待被下载
      #    requests被调度
      #    items正在item pipeline中被处理
    def spider_idle(self,spider):
        for i in range(10):
        print(spider.name)

2. 把中间件加到downloadMiddleware

DOWNLOADER_MIDDLEWARES = {'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None,
'news.signalceshi.QianlongwangSpiderMiddleware': 543
}

官方文档信号供参考

https://scrapy.readthedocs.io/en/latest/topics/signals.html
上一篇下一篇

猜你喜欢

热点阅读