scrapy-redis

2019-01-07 本文已影响0人瘦不下去了

设置去重组件，使用的是scrapy-redis的去重组件，而不再是scrapy框架自己的去重组件了

DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"

SCHEDULER = "scrapy_redis.scheduler.Scheduler"

SCHEDULER_PERSIST = True

SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderPriorityQueue"

SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderQueue"

SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderStack"

ITEM_PIPELINES = {
'example.pipelines.ExamplePipeline': 300,
#实现这个管道，可以将爬虫端获取的item数据，统一保存在redis数据库中
'scrapy_redis.pipelines.RedisPipeline': 400,
}

LOG_LEVEL = 'DEBUG'

Introduce an artifical delay to make use of parallelism. to speed up the crawl.
DOWNLOAD_DELAY = 1

REDIS_HOST = '127.0.0.1'

REDIS_PORT = 6379