Python实战计划学习笔记0704
2016-07-10 本文已影响0人
个十滴水
实战计划第六天,学习Mongo数据库的生成和读取。
最终成果是这样的:
Paste_Image.png我的代码:
#!/usr/bin/env python #告诉计算机执行程序在系统环境变量中的名字,详细位置在环境变量中设置好了
# -*- coding: utf-8 -*-
import pymongo
from bs4 import BeautifulSoup
import requests
client = pymongo.MongoClient('localhost',27017)
xiaozhu = client['xiaozhu']
sheet_tab = xiaozhu['sheet_tab']
def container(url):
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
prices = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > span.result_price > i')
titles = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
types = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > em')
adresses = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > em')
links = soup.select('#page_list > ul > li > a')
for price,title,type,adress,link in zip(prices,titles,types,adresses,links):
data = {
'price': price.get_text(),
'title' : title.get_text(),
'types' : type.get_text().split('-')[0].strip(),
'adress' : adress.get_text().split('-')[2].strip(),
'link' : link.get('href')
}
#print(data)
sheet_tab.insert_one(data)
def find_fangzi():
for i in sheet_tab.find():
if int(i['price']) >= 500:
print(i)
urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(i)for i in range(3)]
for url in urls:
container(url)
find_fangzi()
总结和问题
- strip()去除抓取到的字符串中的空格
- 面向对象编程--习惯性使用函数