利用lxml解析网站页面
2019-12-07 本文已影响0人
8a8d7f2e842b
import requests
from lxml import html
from html.parser import HTMLParser
response = requests.get('https://www.biquge.info/12_12696/5621986.html')
etree = html.etree
html = etree.HTML(response.content)
content= html.xpath('//*[@id="content"]')
content_tos = etree.tostring(content[0], pretty_print=True, method='html')
content_parse = HTMLParser().unescape(content_tos.decode())
print(content_parse)