2019-05-04
2019-05-04 本文已影响0人
远见wan230114
import requests
from bs4 import BeautifulSoup
#引入BS库
res = requests.get('https://localprod.pandateacher.com/python-manuscript/crawler-html/spider-men5.0.html')
html = res.text
soup = BeautifulSoup(html,'html.parser') #把网页解析为BeautifulSoup对象
# print(res.text) # 获取到的html文本
# print(soup) # 获取到的html文本
# print(type(soup)) # soup为<class 'bs4.BeautifulSoup'>对象,非字符串
items = soup.find_all(class_='books') # 通过匹配标签和属性提取我们想要的数据
# print(items[0]) # 打印items
# print(len(items)) # 打印items
# print(type(items)) # items的数据类型<class 'bs4.element.ResultSet'>
Tag_title = items[0].find(class_='title')
print(Tag_title)
print(Tag_title['href'])
# print(str(items[0])+'__________________')