requests库
#requests.get方法
import requests
req=requests.get('http://www.baidu.com')
req.encoding='utf-8'
print(req.status_code)
type(req)
req.headers#页面头部信息
req.text
import requests
def gethtml(url):
try:
req=requests.get(url)
req.raise_for_status()
req.encoding=req.apparent_encoding
print(req.text[:1000])
except:
print('爬虫异常')
url='https://item.jd.com/5089237.html'
gethtml(url)
#伪装浏览器访问
def gethtml(url):
try:
head={'User-Agent':'Mozilla/5.0 '}
req=requests.get(url,headers=head)
req.raise_for_status()
req.encoding=req.apparent_encoding
print(req.text[:100])
except:
print('爬虫异常')
url='https://item.jd.com/5089237.html'
gethtml(url)
req.request.headers#查看爬虫请求的头部信息
#加入关键词搜索
def html(url):
try:
head={'User-Agent':'Mozilla/5.0 '}
kd={'wd':'python'}
r=requests.get(url,params=kd,headers=head)
print(r.request.url)
r.raise_for_status()
print(len(r.text))
except:
print('爬虫异常')
url='http://www.baidu.com/s'
html(url)
#json数据格式
import json
data='{"购买方式":"裸机","版本":"6GB+64GB","skuId":7437788,"颜色":"黑"}'
jdata=json.loads(data)
jdata.keys()
jdata['版本']