requests库

2019-02-01  本文已影响0人  护国寺小学生

#requests.get方法

import requests

req=requests.get('http://www.baidu.com')

req.encoding='utf-8'

print(req.status_code)

type(req)

req.headers#页面头部信息

req.text


import requests

def gethtml(url):

    try:

        req=requests.get(url)

        req.raise_for_status()

        req.encoding=req.apparent_encoding

        print(req.text[:1000])

    except:

        print('爬虫异常')

url='https://item.jd.com/5089237.html'

gethtml(url)


#伪装浏览器访问

def gethtml(url):

    try:

        head={'User-Agent':'Mozilla/5.0 '}

        req=requests.get(url,headers=head)

        req.raise_for_status()

        req.encoding=req.apparent_encoding

        print(req.text[:100])

    except:

        print('爬虫异常')

url='https://item.jd.com/5089237.html'

gethtml(url)

req.request.headers#查看爬虫请求的头部信息


#加入关键词搜索

def html(url):

    try:

        head={'User-Agent':'Mozilla/5.0 '}

        kd={'wd':'python'}

        r=requests.get(url,params=kd,headers=head)

        print(r.request.url)

        r.raise_for_status()

        print(len(r.text))

    except:

        print('爬虫异常')

url='http://www.baidu.com/s'

html(url)


#json数据格式

import json

data='{"购买方式":"裸机","版本":"6GB+64GB","skuId":7437788,"颜色":"黑"}'

jdata=json.loads(data)

jdata.keys()

jdata['版本']

上一篇下一篇

猜你喜欢

热点阅读