爬取人人贷网站
2017-05-10 本文已影响36人
异想派
# -*- coding: UTF-8 -*-
import bs4
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
import requests
from bs4 import BeautifulSoup
import json
url="https://www.renrendai.com/loan#page-1"
def gethtml(url):
try:
r=requests.get(url)
r.raise_for_status()
return r.text
except:
print "连接失败"
def parsehtml(html):
soup=BeautifulSoup(html,"html.parser")
a=soup(id="loan-list-rsp")
mess=[]
for i in a: #type(i)=<class 'bs4.element.Tag'>
b=str(i.string)
htmllist=json.loads(b)["data"]["loans"]
for j in htmllist:
mess.append([j["loanId"],j["title"],j["amount"],j["interest"],j["months"],j["startTime"]])
return mess
def showinfo(num,mess):
tplt="{:^10}\t{:^10}\t{:^10}\t{:^10}\t{:^10}\t{:^10}"
print (tplt.format("订单标的","借款标题","金额","利息","期限","募资时间")) #括号不能遗漏,否则在下一步循环报错
for i in range(num):
cc=mess[i]
print tplt.format(cc[0],cc[1],cc[2],cc[3],cc[4],cc[5])
def main():
html=gethtml(url)
mess=parsehtml(html)
showinfo(20,mess)
main()