爬虫

python爬虫request库-demo

2018-05-06  本文已影响2人  青铜搬砖工

1 requests.get()方法与response

import requests
data = {
    "wd":"中国"
}
header ={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.2.17331',
}
# 使用request库传参时不需要使用urlencode函数去编码成byte类型数据,库自动去编码。get方法参数是params=,post方法是data=
response = requests.get("http://www.baidu.com/s",params=data,headers = header)

# print(response.text)#.text输出str类型数据,可能自动匹配的解码方式不对,造成乱码
# print(response.content.decode("utf-8"))#response.content输出byte类型,硬盘与网络上传输的数据都是byte类型,可以自己定义解码方式
# print(response.url)
# print(response.status_code)
#
with open("baidu.html",'w',encoding='utf-8') as file:
    file.write(response.content.decode("utf-8"))


2.requests.post()方法

import requests
header = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.2.17331",
    "Referer":"https://www.lagou.com/jobs/list_Android?px=default&city=%E5%B9%BF%E5%B7%9E"
}
data = {
    'first':'true',
    'pn':'1',
    'kd':'Android'
}
response =requests.post("https://www.lagou.com/jobs/positionAjax.json?px=default&city=%E5%B9%BF%E5%B7%9E&needAddtionalResult=false",data=data,headers = header)
print(response.json())#输出字典格式json数据

3代理ip

import requests

proxy ={
    'http':'114.236.97.36:61234'
}
url = "http://ip.chinaz.com/getip.aspx"
response = requests.get(url,proxies=proxy)
print(response.text)

4 通过session访问需要登录的页面

import requests
login_url = "http://www.renren.com/PLogin.do"
header ={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.2.17331'
}
data = {
    "email":"15111470603",
    "password":"21111111130"
}
session =requests.session()#session为会话,session发送过request请求,返回的cookie保存到session中,所以可以使用session继续访问需要登录的url
session.post(login_url,headers=header,data = data)
main_url ="http://www.renren.com/880792860/profile"
response = session.get(main_url)
with open("renren2.html",'w',encoding="utf-8") as file:
    file.write(response.content.decode("utf-8"))
上一篇下一篇

猜你喜欢

热点阅读