Python 爬虫学习 爬取房租——麻瓜编程
编程课程链接:https://www.gitbook.com/book/mugglecoding/qa/details
课程名称:第三节练习项目:爬取租房信息
总结:
1、for循环,如果是只有一个变量,不能用zip 不能加括号
例如 for a,b in zip(as,bs): 这个是可以的
例如 for a in zip(as): 或者 for a in (as): 这个是不可以的。
2、中文的逗号,冒号,括号,一律不能过。。。
3、初学者,每一次拷贝出 selector css 路径后,最好都打印一下看看。
4、def 用来设置函数,函数书写顺序要求,在调用一个函数时,函数一定要在该调用上面。这个不是特好,那就粘贴来粘贴去好了。
from bs4 import BeautifulSoup
import requests
def get_loder_sex(class_name):
if class_name==['member_ico']:
return '男'
else:
return '女'
def get_links(url):
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text, 'lxml')
links = soup.select('#page_list > ul > li > a')
for link in links:
href = link.get("href")
get_detail_info(href)
def get_detail_info(url):
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text,'lxml')
titles = soup.select('h4 > em')
addresss = soup.select(' p > span.pr5')
prices = soup.select(' div.day_l > span')
imgs = soup.select('#curBigImage')
owners = soup.select('div.js_box.clearfix > div.member_pic > a > img')
sexs = soup.select(' div.js_box.clearfix > div.member_pic > div')
names = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
for title, address, price, img, owner, name, sex \
in zip(titles, addresss, prices, imgs, owners, names, sexs):
data = {
'title': title.get_text(),
"address": address.get_text(),
"price": price.get_text(),
"image": img.get("src"),
"owners": owner.get("src"),
"name": name.get_text(),
"sex": get_loder_sex(sex.get("class"))
}
print(data)
urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(number) for number in range(1,10)]
for single_url in urls:
get_links(single_url)
'''