爬取商品信息

2016-08-10  本文已影响0人  海洋之心_
from bs4 import BeautifulSoup
with open('D:\Py\Plan-for-combating-master\week1/1_2/1_2answer_of_homework/index.html','r') as wb_data:
    Soup=BeautifulSoup(wb_data,'lxml')
    prices = Soup.select('body > div:nth-of-type(1) > div > div.col-md-9 > div:nth-of-type(2) > div > div > div.caption > h4.pull-right')
    titles = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4 > a')
    reviews = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p.pull-right')
    images= Soup.select('body > div > div > div.col-md-9 > div > div > div > img')
    rates = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')

for price,title,review,image,rate in zip(prices,titles,reviews,images,rates):
    data={
        'price':price.get_text(),
        'title':title.get_text(),
        'review':review.get_text(),
        'image':image.get('src'),
        'rate': len(rate.find_all("span", "glyphicon glyphicon-star"))
    }
    print(data)

重点

参考网站:http://www.zhangxinxu.com/wordpress/2011/06/css3%E9%80%89%E6%8B%A9%E5%99%A8nth-child%E5%92%8Cnth-of-type%E4%B9%8B%E9%97%B4%E7%9A%84%E5%B7%AE%E5%BC%82/

第一种

fs = open("文件地址”,’r')
print(fs.read())
fs.close
with open("文件地址“,‘r') as fs:
  print(fs.read())
上一篇 下一篇

猜你喜欢

热点阅读