2019-05-21
2019-05-21 本文已影响0人
无为真君
关于python 爬虫学习
还是会犯很多语法小错误.不熟练.折腾半天.
# coding=utf-8
# 20190521-G
import urllib2
def download(url, user_agent='wswp', num_retries=2):
print('Downloading:' + url)
# 设置用户代理
headers = {'User-agent':user_agent}
request = urllib2.Request(url, headers=headers)
# 捕获urllib2模块异常
try:
html = urllib2.urlopen(request).read()
except urllib2.URLError as e:
print('Download Error:'+e.reason)
html = None
# 递归调用
if num_retries > 0:
# 检测错误代码是否 5xx
# 5xx 代表服务器端存在问题
# 4xx 代表请求存在问题
# 判断 e 对象中是否存在 code 属性(属性包含变量和方法)
if hasattr(e, 'code') and 500 <= e.code < 600 :
# recursively retry 5xx HTTP errors
return download(url, user_agent, num_retries-1)
return html
html1 = 'http://httpstat.us/500'
# html = 'http://www.meetup.com'
print download(html1)