Amazon
2017-07-04 本文已影响14人
乐小Pi孩_VoV
import requests
from bs4 import BeautifulSoup
url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=mouse'
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, 'lxml')
ex = soup.select('ul#s-results-list-atf > li.s-result-item.celwidget')
#result_0 > div > div > div > div.a-fixed-left-grid-col.a-col-right > div.a-row.a-spacing-small > div:nth-child(1) > a
title = ex[2].select('div.a-row.a-spacing-none > a')[0]['title']
price = ex[2].select('span.a-color-base.sx-zero-spacing')[0]['aria-label']
review = ex[2].select('div.a-row.a-spacing-mini > a.a-size-small.a-link-normal.a-text-normal')[0].get_text()
star = ex[2].select('span.a-icon-alt')[1].get_text().split(' ')[0]
imageUrl = ex[2].select('div.a-row > div > a.a-link-normal.a-text-normal > img')[0]['src']
link = ex[2].select('div.a-row.a-spacing-none > a')[0]['href']
print(title, price, link, star, review, imageUrl)
# coding:utf-8
import os
from bs4 import BeautifulSoup
import requests
data = open(r'F:\mouse.htm', 'rb')
data_ = data.read()
soup = BeautifulSoup(data_, 'lxml')
ex = soup.select('ul#s-results-list-atf > li.s-result-item.celwidget')
def getInfo(P):
try:
title = P.select('h2.a-size-medium.s-inline.s-access-title.a-text-normal')[0]['data-attribute']
except:
title = 'None'
try:
price = P.select('span.a-color-base.sx-zero-spacing')[0]['aria-label']
except:
price = 'None'
try:
review = P.select('div.a-row.a-spacing-mini > a.a-size-small.a-link-normal.a-text-normal')[0].get_text()
except:
review = 'None'
try:
star = P.select('span.a-icon-alt')[1].get_text().split(' ')[0]
except:
star = 'None'
try:
imageUrl = \
P.select('div.a-row > div > a.a-link-normal.a-text-normal > img')[0]['srcset'].split(',')[0].split('1x')[0].split(' ')[0]
down(imageUrl)
except:
imageUrl = 'None'
try:
link = P.select('div.a-row.a-spacing-none > a')[0]['href']
except:
link = 'None'
try:
data___ = dict(Title=title, Price=price, Review=review, Star=star, ImageUrl=imageUrl, Link=link)
print(data___['ImageUrl'])
except:
pass
def down(url):
r = requests.get(url)
Img = url.split('https://images-na.ssl-images-amazon.com/images/I/')[1].split('.')[0]
target = './img/{}.jpg'.format(Img)
with open(target, 'wb') as fs:
fs.write(r.content)
print('%s => %s' % (url, target))
for i in ex:
getInfo(i)