Requests库介绍
2019-08-25 本文已影响0人
慕慕她爸
- 什么是Request
- 是基于urllib库,用Python编写,采用Apache2 Licensed开源协议的http库
- 比urllib更加方便,可以节约大量的工作,完全满足http的测试需求
- 安装Requests
pip3 install requests
- Request用法详解
1.简单举例
import requests
response = requests.get('http://www.baidu.com/')#首先调用requests的get方法
print(type(response))
<class 'requests.models.Response'>
print(response.status_code)
200
print(response.text)#不需要解码就可以直接打印信息
print(response.cookies)
<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
2.各种请求方式
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')
- 带参数get
import requests
response = requests.get('http://httpbin.org/get?name=Arise&age=22')
print(response.text)
import requests
data = {
'name':'Arise',
'age':22
}
response = requests.get('http://httpbin.org/',params = data)
- 解析json
import requests
import json
response = requests.get('http://httpbin.org/get')
print(type(response.text))
#可以对比一下Json序列化的和直接调用response的Json方法的区别print(response.json())
print(json.loads(response.text))
print(response.json())
print(type(json.loads(response.text)))
print(type(response.json()))
- 获取二进制数据
import requests
response2 = requests.get('https://github.com/favicon.ico')
print(type(response2.text),type(response2.content))
print(response.text)
print(response.contect)
将上面的图片下载下来
import requests
response2 = requests.get('https://github.com/favicon.ico')
#在文件保存目录或是Python安装目录下可以找到下载文件
with open('favicon.ico','wb')as f:
f.write(response2.content)
f.close
- 添加headers
import requests
#不加headers有可能会被禁,造成爬取失败
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
response4 = requests.get('http://www.zhihu.com/explore')
response5 = requests.get('http://www.zhihu.com/explore',headers = headers)
print(response4.text)
print(response5.text)
- 基本post请求
import requests
#对比urllib,这里就不需要转码
data ={'name':'Arise','age':22}
response = requests.post('http://httpbin.org/post',data = data)
print(response.text)
或者用json方法来解析
import requests
data ={'name':'Arise','age':22}
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
response2 = requests.post('http://httpbin.org/post',data = data,headers = headers)
print(response2.json())
8.response属性
import requests
#常用的response属性
response3 = requests.get('http://www.jianshu.com')
print(type(response3.status_code),response.status_code)
print(type(response3.headers),response3.headers)
print(type(response3.cookies),response3.cookies)
print(type(response3.url),response3.url)
print(type(response3.history),response3.history)
- 状态码判断
import requests
response4 = requests.get('http://www.jianshu.com')
#响应成功的两种形式状态码,第一种是直接调用状态码200进行判断(还有其他数字的状态码)
if not response4.status_code ==200:
exit()
else:
print('Requst Successfully')
#第二种就是判断状态码的状态是否OK
if not response4.status_code ==requests.codes.ok:
exit()
else:
print('Requst Successfully')
- 文件上传
import requests
file = {'file':open('favicon.ico','rb')}#将之前抓取的github图标以二进制格式读取
response = requests.post('http://httpbin.org/post',files = file)
print(response.text)
11.获取cookie
import requests
response5 = requests.get('http://www.baidu.com')
#相比urllib,就不需要声明任何变量
print(response5.cookies)
for key,value in response5.cookies.items():
print(key + '=' + value)
- 会话模拟
import requests
requests.get('http://httpbin.org/cookies/set/number/123456789')#为网站的访问设置cookie
response6 = requests.get('http://httpbin.org/cookies')#与上面的行为时独立的,所以获取不到任何与cookie相关的信息
print(response6.text)
import requests
s = requests.Session()#通过声明Session对象,在使用这个对象发起两次GET请求(相当于同一个浏览器发出来的请求)
s.get('http://httpbin.org/cookies/set/number/123456789')
response = s.get('http://httpbin.org/cookies')
print(response.text)
- 证书验证
import requests
#通过一下两行代码即可把警报消除,即使verify=False,报警还是存在的
from requests.packages import urllib3
urllib3.disable_warnings()
#首先会检测证书是否合法,通过verify就可以设置成False就可关闭错误提示
response = requests.get('https://www.12306.cn',verify = False)
print(response.status_code)
- 设置代理
import requests
proxies = {
'http':'http://127.0.0.1:1080/pac?auth=HgT2fpms98njlh9QGpsP&t=201803030916114202',
'https':'https://127.0.0.1:1080/pac?auth=HgT2fpms98njlh9QGpsP&t=201803030916114202',
}
response = requests.get('http://www.taobao.com',proxies = proxies)
print(response.status_code)
- 超时处理
import requests
from requests.exceptions import ReadTimeout
try:
response = requests.get('http://www.taobao.com',timeout = 0.5)
print(response.status_code)
except ReadTimeout:
print('TIMEOUT')
- 登录验证
import requests
from requests.auth import HTTPBasicAuth
#也可以是auth={'user','123'}以字典的形式传入
r = requests.get('http://120.27.34.24.9001',auth = HTTPBasicAuth('user','123'))
print(r.status_cde)