小爬虫

2017-11-30  本文已影响0人  Python野路子

注:本人菜鸟一枚,自学python,记录学习过程中所学所思,希望自己有所收获

__author__ = 'juehuan'
import requests
import json
import time


class cldata():
    def __init__(self):
        self.url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList'
        self.header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
        self.cookie = {
            'Cookie': 'JSESSIONID=02EF275A008A3ADEE700B04B543AE18E; JSESSIONID=A7FC968A8F43B9DA2EABBA61D72602CC'}
        self.f = open('食品.txt', 'w', encoding='utf-8')
        self.f.write('企业名称 | 许可证编号 | 发证机关 | 有效期至 | 发证日期\n')

    def get_cfda(self, page):
        self.data = {
            'on': True,
            'page': page,
            'pageSize': 15,
            'productName': '',
            'conditionType': 1,
            'applyname': '',
            'applysn': ''
        }
        self.html = requests.post(self.url, data=self.data, headers=self.header, cookies=self.cookie)
        datas = self.html.json()
        for i in datas['list']:
            self.write_cfda(i)

    def write_cfda(self, company):
        try:
            self.f.write('%s | %s | %s | %s | %s\n' % (
                company['EPS_NAME'], company['EPS_NAME'], company['QF_MANAGER_NAME'],
                company['XK_DATE'], company['XC_DATE']))
        except:
            print('写入错误')

    def close(self):
        self.f.close()


if __name__ == '__main__':
    start_time = time.time()
    cfda = cldata()
    for i in range(1, 271):
        cfda.get_cfda(i)
    cfda.close()
    end_time = time.time()
    print('total time :', end_time - start_time)

总结:涉及面向对象(self)、爬虫requests的post方式、列表等知识点;

上一篇下一篇

猜你喜欢

热点阅读