作业1

2016-05-21  本文已影响0人  xilixjd

frombs4importBeautifulSoup

importrequests

importtime

url='http://bj.58.com/pingbandiannao/25936435448255x.shtml?psid=110197818191709710732024550&entinfo=25936435448255_0&iuType=p_0'

defget_58_goods_page(url):

wb_data=requests.get(url)

soup=BeautifulSoup(wb_data.text,'lxml')

cates=soup.find_all('a',href='http://bj.58.com/pbdn/')

#print(cates[0].get_text())

titles=soup.select('div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.mainTitle > h1')

#print(titles[0].get_text())

times=soup.select('ul.mtit_con_left.fl > li.time')

# print(times[0].get_text())

prices=soup.select('div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.sumary > ul > li > div.su_con > span')

# print(prices[0].get_text()+'元')

locations=soup.select(' div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.sumary > ul > li > div.su_con > span > a')

# print(locations[0].get_text(),'-',locations[1].get_text())

iflen(locations)==2:

location1=locations[0].get_text() +'-'+ locations[1].get_text()

eliflen(locations)==1:

location1=locations[0].get_text()

forcate,title,time,price,locationinzip(cates,titles,times,prices,locations):

data={

'cate':cate.get_text(),

'title':title.get_text(),

'time':time.get_text(),

'price':price.get_text()+'元',

'location':location1,

'url':url

}

print(data)

# get_58_goods_page(url)

url2='http://bj.58.com/pbdn/0/pn'

defget_58_index_page(url):

page2=[]

wb_data=requests.get(url)

soup=BeautifulSoup(wb_data.text,'lxml')

# page=soup.find_all(class_='t')

pages=soup.select('a.t')

forpageinpages:

page1=page.get('href')

if'entinfo'inpage1and'zhineng'not inpage1and'jing'not inpage1:

page2.append(page1)

returnpage2

defnumber_of_url(start,end,url):

fornuminrange(start,end+1):

page=url+str(num)

page2=get_58_index_page(page)

forpage3inpage2:

time.sleep(2)

get_58_goods_page(page3)

number_of_url(1,2,url2)

上一篇 下一篇

猜你喜欢

热点阅读