第一周大作业- http://bj.58.com/pingban

2016-07-18  本文已影响0人  rdczowh
  1. 结果如下:


  2. 程序源码:

#!/usr/bin/env python
# -*- coding: utf8 -*-
import requests
from bs4 import BeautifulSoup
def get_links_from(who_sells):
urls = []
url_base = "http://bj.58.com/pingbandiannao/{}/".format(str(who_sells))
r = requests.get(url_base)
soup = BeautifulSoup(r.text, 'lxml')
for url in soup.select('tr.zzinfo td.t  a.t '):
urls.append(url.get("href").split('?')[0])
# print(urls)
return urls
def download(who_sells):
# r = requests.get(url, proxies=proxies, headers=headers)
urly = get_links_from(who_sells)
for url1 in urly:
#print(url1)
wb_data = requests.get(url1)
soup = BeautifulSoup(wb_data.text, 'lxml')
title = soup.title.text.strip()
price = soup.select('span.price_now i')
place = soup.select('div.palce_li i')
domain = soup.select('span.crb_i a')
views = soup.select('span.look_time')
# pr)int (views
data = {
'domain': domain[0].text,
'title': title,
'price': price[0].text,
'place': place[0].text,
'views': views[0].text,
'var' : '个人' if who_sells == 0 else '公司'
}
print (data)
def main():
download(0)
# get_links_from(who_sells=1)
if __name__ == '__main__':
main()
  1. 经验总结:方式
上一篇下一篇

猜你喜欢

热点阅读