requests,cookie,代理,证书忽略

2019-03-13  本文已影响0人  Senter

handler和opener.py

import urllib
from urllib import request

# urlopen: 特殊的打开器opener
# urllib.request.urlopen(url)

# 处理cookie或代理 需要用到自定义打开器

# 处理器对象hander
http = urllib.request.HTTPHandler()  # http处理器
# http = urllib.request.HTTPHandler(debuglevel=1)  # debuglevel=1 调试级别,可以在控制台输出日志
# print(http)

# 创建打开器对象opener
opener = urllib.request.build_opener(http)  # 要传入handler对象

# 设置opener为全局打开器
# 后面的urlopen也会使用opener去打开url
urllib.request.install_opener(opener)


# 打开url
response = opener.open("http://www.baidu.com")
print(response)
print(response.read().decode())

# urlopen()
# response = request.urlopen('http://www.baidu.com')
# print(response.read().decode())

cookie.py

from http import cookiejar
from urllib import request

# 获取cookie
# 创建一个cookie对象
cookies = cookiejar.CookieJar()
# print(cookies)

# handler对象
cookie_handler = request.HTTPCookieProcessor(cookies)

# opener对象
opener = request.build_opener(cookie_handler)

response = opener.open("http://www.baidu.com")
# print(response.read())

# 获取到百度的cookie
print(cookies)

for cookie in cookies:
    # print(cookie.__dict__)
    print(cookie.name, ":", cookie.value)

下载cookie.py

import urllib
from http import cookiejar
from urllib import request

# LWPCookieJar() : 是FileCookieJar的子类, FileCookieJar是CookieJar的子类
filename = "baiducookie.txt"  # 用于存cookie
cookies = cookiejar.LWPCookieJar(filename=filename)

# handler
cookie_handler = request.HTTPCookieProcessor(cookies)

# opener
opener = request.build_opener(cookie_handler)

response = opener.open("http://www.baidu.com")
# print(response.read())
# print(cookies)

# 将opener设置为全局打开器:后面的urlopen也是该opener对象
# request.install_opener(opener)
# response = request.urlopen("http://www.baidu.com")

# 保存cookie
cookies.save()

# 如果有错误
# cookies.save(ignore_discard=True, ignore_expires=True)

重复使用cookie.py

import urllib
from http import cookiejar
from urllib import request

cookies = cookiejar.LWPCookieJar()
# 加载本地的cookie
cookies.load(filename="baiducookie.txt")

cookie_handler = request.HTTPCookieProcessor(cookies)
opener = request.build_opener(cookie_handler)

response = opener.open("http://www.baidu.com")
print(response.read())

代理IP和IP代理池和UA池.py

import random
import urllib
from urllib import request

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}

# 使用代理
# proxy = {'http': "61.135.155.82:443"}

# 使用ccproxy
proxy = {'http': 'http://user1:123456@10.20.154.59:808'}
# proxy = {'http': 'http://10.20.154.59:808'}


# IP代理池
proxy_list = [
    {'http': "61.135.155.82:443"},
    {'http': "61.183.233.6:54896"},
    {'https': "218.249.45.162:35586"},
    {'https': "14.118.135.10:808"},
    #  ...
]

# ua池(user-agent池 )
user_agent_list=[
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko",
    "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
    "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Mobile Safari/537.36"
]

# 设置代理
# proxy = random.choice(proxy_list)  # 从代理池中随机获取一个代理ip
# print(proxy)
proxy_handler = request.ProxyHandler(proxies=proxy)
opener = request.build_opener(proxy_handler)


url = "http://www.ifeng.com/"
req = request.Request(url, headers=headers)
req.add_header("User-Agent", random.choice(user_agent_list))  # 从ua池中随机获取一个ua
res = opener.open(req)
print(res.read().decode())

requests的基本使用.py

import json
import requests

# GET请求
# requests.request('get', 'url')

response = requests.get('http://www.baidu.com')
print(response)  # <Response [200]> 响应对象
# print(response.__dict__)
# print(response.status_code)  # 200
# print(response.url)  # http://www.baidu.com/
# print(response.cookies)  # cookie
# print(response.encoding)  # ISO-8859-1

# 响应数据,字符串类型
# print(response.text)
# print(type(response.text))  # <class 'str'>

# 响应数据,二进制
# print(response.content)
# print(response.content.decode())

# get传参
# 方式一:直接在url后加参数
# 方式二:可以使用params
# response = requests.get('http://www.baidu.com/s?wd=DG')
response = requests.get('http://www.baidu.com/s', params={'wd':'DG'})
# print(response.text)


# POST请求
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}


# 有道翻译
wd = input('请输入要翻译的中文:')

# http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"

data = {
    "i": wd,
    "from": "zh-CHS",
    "to": "en",
    "smartresult": "dict",
    "client": "fanyideskweb",
    "salt": "1543306416677",
    "sign": "68fcd812e1290ca9154edd145acffec0",
    "doctype": "json",
    "version": "2.1",
    "keyfrom": "fanyi.web",
    "action": "FY_BY_CLICKBUTTION",
    "typoResult": "false",
}

response = requests.post(url, data=data, headers=headers)
# print(response.text)

# json解析
# dic = json.loads(response.text)
dic = response.json()  # requests自带的json解析
# print(dic)
result = dic['translateResult'][0][0]['tgt']
print(result)

requests使用代理IP.py

import requests

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}

# 代理
proxy = {'http': 'http://user1:123456@10.20.154.59:808'}

response = requests.get('http://www.ifeng.com', proxies=proxy, headers=headers)
print(response.text)

requests使用cookie.py

import requests

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}

# 提交给服务器cookie
cookies = {"QZ_FE_WEBP_SUPPORT": "1", "pgv_pvid": "5307205432"}

response = requests.get('http://www.baidu.com', headers=headers, cookies=cookies)

# 获取cookie
res_cookies = response.cookies
print(res_cookies)
print(requests.utils.dict_from_cookiejar(res_cookies))
# {'H_PS_PSSID': '1452_21091_26350_20718', 'delPer': '0', 'BDSVRTM': '0', 'BD_HOME': '0'}

requests使用session.py

import requests

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}

# 笔趣阁
url = "https://www.biquge5200.cc/u/login.htm"

data = {
    "name": "niejeff",
    "password": "E10ADC3949BA59ABBE56E057F20F883E",
    "autoLogin": 1,
    "autologin":1,
}

# session: 会保存cookie
session = requests.session()

# 请求完成后,会自动保存登录成功后的cookie
res = session.post(url, data=data, headers=headers)
# print(res.text)  # {"flag":"success","data":""}

# 登录成功
print("============= 登录成功后 ==============")
# 访问登录成功后的个人中心(我的书架页面)
# https://www.biquge5200.cc/home/
url = "https://www.biquge5200.cc/home/"
res2 = session.get(url, headers=headers)
print(res2.text)

requests忽略证书

import requests

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}

# verify:
#   verify=True 需要验证证书,默认是True
#   verify=False 忽略证书验证, 会出现警告
res = requests.get('https://www.baidu.com', headers=headers, verify=False)
# print(res.text)

# ssl._create_unverify_context()


# auth
# 请求github
auth = ('name', '123456')  # 填写自己的用户名和密码

response = requests.get("https://api.github.com/user", auth=auth)
print(response.text)
上一篇下一篇

猜你喜欢

热点阅读