爬虫

6-xpath和css select基本使用

2018-08-10  本文已影响1人  撸撸很乖张

Xpath基本使用

import requests
from lxml import etree

# url = 'http://www.python-requests.org/en/master/'
#
# with requests.get(url) as resp:
#     html = resp.content
#
# with open("python-requests.html",'wb') as f:
#     f.write(html)

with open('python-requests.html','rb') as f:
    html = f.read()
print(type(html))


tree = etree.HTML(html)
print(type(tree))

# content = tree.xpath('//*[@id="requests-http-for-humans"]/h1/text()')
content = tree.xpath('//h1/text()')
print(content[0])

toctree = tree.xpath('//*[@id="the-user-guide"]/div/ul/li/a/text()')
for toc in toctree:
    # print(toc.xpath('string(.)'))
    print(toc)

css select基本使用

from lxml import etree

with open('python-requests.html','rb') as f:
    html = f.read()
print(type(html))

tree = etree.HTML(html)

print(tree.xpath('//html/head/title/text()')[0])
print(tree.cssselect('html > head > title ')[0].text)
上一篇 下一篇

猜你喜欢

热点阅读