python技巧

使用request库小案例一

2018-08-23  本文已影响0人  陆_志东
import requests
import logging
import json
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',   # Mon, 16 Jul 2018 16:29:20 test.py[line:36] INFO
    # filename='log/log.txt',   # 日志打印到的文件
    # filemode='w'
)
logger_root = logging.getLogger("")


def read_file():
    with open("./oid.txt","r",encoding="utf-8",newline="\n") as f:
        while True:
            data = f.readline().strip()
            if data is None or not data:
                break
            yield data


def post_http(post_data,company_url):
    response = requests.post(company_url,data=post_data)
    if response.status_code != 200:
        logger_root.info(response.status_code)
        logger_root.info(response)
        return
    return json.loads(response.text)


def save_file(data_list):
    with open("./result.txt","a",encoding="utf-8",newline="\n") as f:
        data = "\n".join(data_list) + "\n"    # 注意这里一定要在最后加一个\n ,不然每次追加文件,就会造成上次的最后一行和下次第一行写在同一行
        f.write(data)



def start():
    data_list = list()
    post_data = json.dumps(["companies"],ensure_ascii=False).encode("utf-8")
    company_url = "http://114.55.103.126/yq/tool/fetch_news?oid={}"
    industry_url = "http://114.55.103.126/yq/tool/fetch_industry"

    i = 0
    for oid in read_file():
        i += 1
        row_data = dict()
        row_list = list()
        row_data[oid] = row_list
        url = company_url.format(oid)
        company_list = post_http(post_data,url)
        if company_list:
            company_list = company_list["companies"].split("|")
            logger_root.info("程序执行了 {} 次 oid 获取=========================================".format(i))
            for company in company_list:
                body_data = dict()
                body_data["name"] = company
                industry_name = post_http(json.dumps(body_data,ensure_ascii=False).encode("utf-8"),industry_url)
                if industry_name is None:
                    continue
                industry_name = industry_name.get("l1_domain_name",None)
                row_list.append({"company":company,"industry":industry_name})
        data_list.append(json.dumps(row_data,ensure_ascii=False))
        if i % 100 == 0:
            logger_root.info("存储 file {} 次 =============================================".format(i / 100))
            save_file(data_list)
            data_list = list()

    if data_list:
        save_file(data_list)





if __name__ == '__main__':
    start()
上一篇下一篇

猜你喜欢

热点阅读