Python网抓

2020-10-28  本文已影响0人  五维思考

抓取视频

import requests
import re

res = requests.get("网址")
from lxml import etree
res_xpath = etree.HTML(res.text)
res_xpath.xpath('/html/head/title/text()') # 绝对路径 /text()提取文字
# res_xpath.xpath('//title/text()') # 提取任意子节点
for li in res_xpath.xpath('//ul[@id="categoryList"]/li'):
    url = 'http://www.pearvideo.com/' + li.xpath('./div/a/@href)[0]
    res = requests.get(url)
    video = re.findall('srcUrl="(.*?)"', res.text, re.S)[0] #re.S忽略换行符的干扰
    title = re.findall('srcUrl="<title>(.*?)</title>"', res.text, re.S)[0]
    response = requests.get(video) # 拿到二进制数据
    with open(title + ".mp4", mode = 'wb') as f:
        f.write(response.content)
上一篇 下一篇

猜你喜欢

热点阅读