第一周第二课时
2016-05-21 本文已影响10人
采矿

from bs4 import BeautifulSoup
with open('./1_2_homework_required/index.html', 'r') as f:
soup = BeautifulSoup(f, 'lxml')
pics = soup.select('div.col-md-9 > div > div > div > img')
titles = soup.select('h4 a')
prices = soup.select('h4.pull-right')
views = soup.select('div.ratings p.pull-right')
starts = soup.select('div.ratings > p:nth-of-type(2)')
for pic, title, price, view, start in zip(pics, titles, prices, views, starts):
data = {
# 注意不要丢掉前面pic title 等键的引号
'pic': pic.get('src'),
'title': title.get_text(),
'price': price.get_text(),
'view': view.get_text(),
#find_all(name, attrs, recursive, text, **kwargs)
# names:标签名字;attrs:标签属性
'start': len(start.find_all('span','glyphicon glyphicon-star'))
}
print(data)