Python实战 - 第5节:开始使用MongoDB
2016-11-08 本文已影响0人
辉叔不太萌
笔记
- 连接数据库服务:
client = pymongo.MongoClient('localhost', 27017)
- 创建/访问数据库:
$dbName = client['$dbName']
- 创建/访问数据表:
$tableName = $dbName['$tableName']
- 插入数据:
$tableName.insert_one(data)
- 查询数据:
$tableName.find() $tableName.find({'$columnName':$columnValue}) # $lt/$lte/$gt/$gte/$ne 依次为 </<=/>/>=/!= $tableName.find({'$columnName':{'$lte':$value}})
作业
- 爬取租房信息入库
import pymongo
from bs4 import BeautifulSoup
import requests
import time
\# MongoDB connect
client = pymongo.MongoClient('localhost', 27017)
xiaozhu = client['xiaozhu']
duanzufang = xiaozhu['duanzufang']
\# URL Parse
urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 4)]
def parse_gender(gender_class):
if gender_class == 'member_ico1':
return '女'
elif gender_class == 'member_ico':
return '男'
else:
return '未知'
def parse_datail_page(url):
web_data = requests.get(url)
soap = BeautifulSoup(web_data.text, 'lxml')
titles = soap.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
addrs = soap.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
rants = soap.select('#pricePart > div.day_l > span')
pics = soap.select('#curBigImage')
owner_pics = soap.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
gender_class = soap.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div')[0]['class'][0]
owner_names = soap.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
data = {
'title': titles[0].get_text(),
'addr': addrs[0].get_text().strip(),
'rant': int(rants[0].get_text()),
'pic': pics[0].get('src'),
'owner_pic': owner_pics[0].get('src'),
'owner_name': owner_names[0].get_text(),
'gender': parse_gender(gender_class),
}
print(data)
\# insert to MongoDB
duanzufang.insert_one(data)
time.sleep(1)
def parse_list_page(url):
web_data = requests.get(url)
soap = BeautifulSoup(web_data.text, 'lxml')
detail_urls = soap.select('#page_list > ul > li > a')
for detail_url in detail_urls:
parse_datail_page(detail_url.get('href'))
for url in urls:
parse_list_page(url)
- 过滤查询租金大于等于500的房源信息
import pymongo
client = pymongo.MongoClient('localhost', 27017)
xiaozhu = client['xiaozhu']
duanzufang = xiaozhu['duanzufang']
for item in duanzufang.find({'rant':{'$gte':500}}):
print(item)