课时26 在MongoDB中筛选房源

2016-07-07  本文已影响0人  数据分析学雷
from pymongo import MongoClient
from bs4 import BeautifulSoup
import requests

host = 'localhost'
port = 27017

client = MongoClient(host,port)
db = client['test']
sheet = db['sheet']

urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1,4)]
print(urls)

for url in urls:
    wb_data = requests.get(url)
    soup = BeautifulSoup(wb_data.text, 'lxml')
    titles = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
    prices = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > span.result_price > i')

    for title, price in zip(titles,prices):
        data = {
            'title':title.get_text(),
            'price':price.get_text()
        }

        print(data)
        sheet.insert_one(data)
print('----------------------------------------')
for record in sheet.find():
    if int(record['price']) >= 500:
        print(record)
上一篇下一篇

猜你喜欢

热点阅读