案例21:爬虫取书名、评分、价格
2020-05-03 本文已影响0人
Iphone60Plus
import requests
#引入requests库
from bs4 import BeautifulSoup
#引入bs库
res = requests.get('http://books.toscrape.com/')
#请求数据
soup = BeautifulSoup(res.text,'html.parser')
#解析数据
aaa = soup.find('ul',class_="nav nav-list").find('ul').find_all('li')
#查找数据:li标签、ul标签多,无法直接查找,需要多层嵌套精准定位
iii = []#大列表用于储存
for i in aaa:
bbb = i.text.strip()
iii.append(bbb)
print(iii)
import requests
from bs4 import BeautifulSoup
res = requests.get('http://books.toscrape.com/catalogue/category/books/travel_2/index.html')
soup = BeautifulSoup(res.text, 'html.parser')
article = soup.find_all('article', class_='product_pod')
for item in article:
name_h3 = item.find('h3') # 找到书名所在标签h3中
name_a = name_h3.find('a')
name = name_a['title'] # 找到每本书的全名
print(name)
score_p = item.find('p')
score_dirt = score_p.attrs # 这一步是取出p标签的属性名(以大字典的形式)
# print(score_dirt)
score = score_dirt['class'][1]
print(score) # 取出分
price = item.find('p', class_='price_color') # 取出价格
print(price.text[1:]+'\n') # 注意偏移量