python网络爬虫8:Cookies的获取与设置?

2021-01-03  本文已影响0人  0清婉0

# 获取网页属性信息

import urllib.request

url = 'https://www.baidu.com'

response = urllib.request.urlopen(url=url)

print('响应状态码为:',response.status)

print('响应头所有信息为:', response.getheaders())

print('响应头指定信息为:', response.getheader('Accept-Ranges'))

print('HTML代码如下:\n', response.read().decode('utf-8'))

# 发送post请求

import urllib.request

import urllib.parse

url = 'https://www.httpbin.org/post'

data = bytes(urllib.parse.urlencode({'hello':'python'}), encoding='utf-8')

response = urllib.request.urlopen(url=url,data=data)

print(response.read().decode('utf-8'))

# 捕获超时处理网络超时,并进行下一任务

import urllib.request

import urllib.error

import socket

url = 'https://www.python.org/'

try:

    response = urllib.request.urlopen(url=url,timeout=0.1)

    print(response.read().decode('utf-8'))

except urllib.error.URLError as error:

    if isinstance(error.reason, socket.timeout):

        print('当前任务已超时,即将执行下一任务')

# 构建完整的网络请求

# urllib.request.Request(url,data=None,headers={},origin_req_host=None,unverifiable=False,method=None)

# url 访问网站的URL完整地址

# data:该参数默认为None,通过该参数确认请求方式,以字典形式的数据作为参数值,如果是None,表示请求方式为GET,否则为POST

# headers:设置请求头部信息,该参数为字典类型

# origin_req_host:用于设置请求方的host名称或IP

# unverifiable:用于设置网页是否需要验证,默认是False

# method:用于设置请求方式,如GET、POST等,默认为GET请求

# 设置请求头参数是为了模拟浏览器向网页后台发送网络请求,这样可以避免服务器的反爬措施

import urllib.request

import urllib.parse

url = 'https://www.baidu.com/'

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}

data = bytes(urllib.parse.urlencode({'hello':'python'}), encoding='utf-8')

r = urllib.request.Request(url=url,data=data,headers=headers,method='POST')

response = urllib.request.urlopen(r)

print(response.read().decode('utf-8'))

# Cookies的获取与设置

# Cookies是服务器向客户端返回响应数据时所留下的标记,当客户端再次访问服务器时将携带这个标记

import urllib.request

import urllib.parse

import http.cookiejar

import json

url = 'https://passport.mingrisoft.com/Login/checkLogin'

data = bytes(urllib.parse.urlencode({'username':'***','password':'****'}),encoding='utf-8')

r = urllib.request.Request(url=url,data=data,method='POST')

cookie = http.cookiejar.CookieJar()

cookie_processor = urllib.request.HTTPCookieProcessor(cookie)

opener = urllib.request.build_opener(cookie_processor)

response = opener.open(r)

response = json.loads(response.read().decode('utf-8'))

if response == '登录成功':

    for i in cookie:

        print(i.name + '=' + i.value)

出现错误,奇怪了,不知为什么,请大家帮忙解答??

# raise JSONDecodeError("Expecting value", s, err.value) from None

# json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

上一篇下一篇

猜你喜欢

热点阅读