week2_1_homework-due
## grab info in foshan.xiaozhu.com
```
import requests,pymongo
from bs4 importBeautifulSoup
client = pymongo.MongoClient('localhost',27017)
xiaozhu_fs = client['xiaozhu_fs']
sheet_daily_rent = xiaozhu_fs['sheet_daily_rent']
# url = 'http://foshan.xiaozhu.com/fangzi/2302538827.html'
# wb_data = requests.get(url)
# soup = BeautifulSoup(wb_data.text,'lxml')
def client_get_info(info):
sheet_daily_rent.insert_one(info)
def get_page_links(page_nums=3):
page_links = []
for each_numinrange(1,page_nums):
full_url ='http://foshan.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(each_num))
wb_data = requests.get(full_url)
soup = BeautifulSoup(wb_data.text,'lxml')
for linkinsoup.select('#page_list > ul > li > a'):
page_links.append(link.get('href'))
return page_links
def print_gender(class_name):
if class_name =='member_boy_ico':
return'male'
else:
if class_name =='member_girl_ico':
return'female'
else:
return None
defget_page_info(page_nums=3):
urls = get_page_links(page_nums)
forurlinurls:
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
gender = soup.select('#floatRightBox > div > div > h6 > span')[0].get('class')[0]
data= {
'title':soup.title.text,
'address':soup.select('.pr5')[0].text,
'daily-price':soup.select('.day_l')[0].text,
'landlord_name':soup.select('a.lorder_name')[0].text,
'gender':print_gender(gender),
'landlord_info':list(soup.select('p.col_green')[0].stripped_strings)
}
# client_get_info(data)
#get_page_info()
for iteminsheet_daily_rent.find():
print(item)
```