100行代码做一个浓缩版知乎

2017-08-21  本文已影响0人  不知伯乐
100行代码做一个浓缩版知乎
100行代码做一个浓缩版知乎
#-*-coding:utf8;-*-
#qpy:2
#qpy:console
import requests,os,json,time,re

import warnings
warnings.filterwarnings("ignore")
html=open('/sdcard/html','r').read()

head={
         'X-Requested-With':'mark.via.gp',
      'User-Agent':'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_0 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/6.0.5 Mobile/8A93 Safari/5531.22.77',
              }
#获取整个回答的json数据
#返回一个图片地址list
#offset位移量
def get_json(offset,ids,page):
    
    h='''https://www.zhihu.com/api/v4/questions/'''+str( ids)+'''/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cupvoted_followees%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%3F%28type%3Dbest_answerer%29%5D.topics&limit='''+str(page)+'''&offset='''+str(offset)+'''&sort_by=default'''
    s = requests.Session()  
    s.cookies['z_c0']='Mi4xWnVtakFnQUFBQUFBSUVJU0doMDNEQmNBQUFCaEFsVk5HX20zV1FDeU5uV2pHU2RQTTl2RnhDdllCbVE2WVNfSHdB|1502637083|5297b0bc003ddf3f9ab8767c3a9f276b61be180a'

    r=s.get(h,headers=head,verify=False)
    js=json.loads(r.text)
    question_title=js['data'][0]['question']['title']#获取问题的标题
    name_list=[]
    content_list=[]
    up_list=[]
    head_list=[]
    info_dict={}
    for item in js['data']:
        up_list.append(item['voteup_count'])
        name_list.append(item['author']['name'])
        head_list.append(item['author']['headline'])
        content_list.append(re.sub(r'','图片',item['content'].encode('utf8')))
    
    info_dict['up_list']=up_list
    info_dict['content_list']=content_list
    info_dict['name_list']=name_list
    info_dict['title']=question_title
    info_dict['head_list']=head_list
    return info_dict
#数据持久化函数
#content_list传入的文本内容
#name_list答主的名字
#up_list点赞数
#title问题的标题
def download(dt,fm):
   filename=re.sub(r'[\?/\\:\ *"<>|]','',dt['title'])
   print filename.encode('utf8')
   if False==os.path.exists('/sdcard/%s/%s'%(fm,filename)):
       os.mkdir("/sdcard/%s/%s"%(fm,filename))  
   i=0
   print len(dt['name_list'])
   print  len(dt['content_list'])
   while i',r.content))
 for item in u:
 
     question(item)
     global qq
     print '开始读取下一个问题'
     qq=0
 print '#'*10+str(i)
上一篇 下一篇

猜你喜欢

热点阅读