Scrapy Settings.py文件配置

2019-01-07  本文已影响0人  changzj

BOT_NAME = ''

SPIDER_MODULES = ['downloadmiddlewares.spiders']

NEWSPIDER_MODULE = 'downloadmiddlewares.spiders'

USER_AGENT = ' '

ROBOTSTXT_OBEY = False

CONCURRENT_REQUESTS = 32

DOWNLOAD_DELAY = 3

CONCURRENT_REQUESTS_PER_DOMAIN = 16

CONCURRENT_REQUESTS_PER_IP = 16

COOKIES_ENABLED = False

COOKIES_DEBUG = False

设置日志信息

LOG_ENABLED
默认: True
是否启用logging。

LOG_ENCODING
默认: 'utf-8'
logging使用的编码。

LOG_LEVEL
默认: 'DEBUG'
log的最低级别。可选的级别有: CRITICAL、 ERROR、WARNING、INFO、DEBUG 。

LOG_FILE
默认: None
logging输出的文件名。如果为None,则使用标准错误输出(standard error)
LOG_FILE = "TencentSpider.log"
LOG_LEVEL = "INFO"

TELNETCONSOLE_ENABLED = False

DEFAULT_REQUEST_HEADERS = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6788.400 QQBrowser/10.3.2864.400'
}

SPIDER_MIDDLEWARES = { 'downloadmiddlewares.middlewares.DownloadmiddlewaresSpiderMiddleware': 543,
}

DOWNLOADER_MIDDLEWARES = {
'downloadmiddlewares.middlewares.UserAgentDownloadMiddleware':543
}

EXTENSIONS = {
'scrapy.extensions.telnet.TelnetConsole': None,
}

ITEM_PIPELINES = {
'downloadmiddlewares.pipelines.DownloadmiddlewaresPipeline': 300,
}

自动限速扩展(上一个请求和下一个请求之间的时间是不固定的

AUTOTHROTTLE_ENABLED = True

AUTOTHROTTLE_START_DELAY = 5

AUTOTHROTTLE_MAX_DELAY = 60

AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0

AUTOTHROTTLE_DEBUG = False

HTTPCACHE_ENABLED = True

HTTPCACHE_EXPIRATION_SECS = 0

HTTPCACHE_DIR = 'httpcache'

HTTPCACHE_IGNORE_HTTP_CODES = []

HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

上一篇 下一篇

猜你喜欢

热点阅读