6-xpath和css select基本使用
2018-08-10 本文已影响1人
撸撸很乖张
Xpath基本使用
import requests
from lxml import etree
# url = 'http://www.python-requests.org/en/master/'
#
# with requests.get(url) as resp:
# html = resp.content
#
# with open("python-requests.html",'wb') as f:
# f.write(html)
with open('python-requests.html','rb') as f:
html = f.read()
print(type(html))
tree = etree.HTML(html)
print(type(tree))
# content = tree.xpath('//*[@id="requests-http-for-humans"]/h1/text()')
content = tree.xpath('//h1/text()')
print(content[0])
toctree = tree.xpath('//*[@id="the-user-guide"]/div/ul/li/a/text()')
for toc in toctree:
# print(toc.xpath('string(.)'))
print(toc)
css select基本使用
from lxml import etree
with open('python-requests.html','rb') as f:
html = f.read()
print(type(html))
tree = etree.HTML(html)
print(tree.xpath('//html/head/title/text()')[0])
print(tree.cssselect('html > head > title ')[0].text)