你脑海的意识流与你分享青春和爱情的故事代码改变世界

python爬取百度地图迁徙-城内出行强度

2020-02-26  本文已影响0人  AI信仰者

百度地图迁徙链接为 :http://qianxi.baidu.com/
建议尽早爬取数据,以后可能会关闭
代码为:

import csv
import json
import os
import random
import time
from urllib import request
import requests

from utils.read_write import readTXT, writeOneJSON

os.chdir(r'D:\data\人口数据\百度迁徙大数据\全国城市省份市内流入流出')


def requerts_url(url):
    try:
        response = requests.get(url, timeout=100, headers=headers)
        return response
    except:
        print(url)


headers = {"User-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 "
                         "(KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"}
opener = request.build_opener()
opener.add_headers = [headers]
request.install_opener(opener)

date_list = []
lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')
for i in range(1, 389):
    obj = lines[i].split(',')
    firsturl = "http://huiyan.baidu.com/migration/internalflowhistory.jsonp?dt=city&" \
               "id=" + obj[0] + "&date=20200226&callback=jsonp"
    print(firsturl)
    randint_data = random.randint(0, 6)
    time.sleep(randint_data)
    response = requerts_url(firsturl)
    r = response.text[6:-1]  # 去头去尾
    writeOneJSON(r, "城内出行强度_" + obj[1] + ".json")
    data_dict = json.loads(r)  # 字典化
    if data_dict['errmsg'] == 'SUCCESS':
        data_list = data_dict['data']['list']
        with open("城内出行强度_" + obj[1] + ".csv", "w+", newline="") as csv_file:
            writer = csv.writer(csv_file)
            header = ["日期", "出行强度"]
            writer.writerow(header)  #
            date_list2 = []
            for key in data_list:
                dictList1 = []
                dictList1.append(key)
                dictList1.append(data_list[key])
                date_list2.append(dictList1)
            date_list2.sort()
            for data in date_list2:
                writer.writerow(data)

其中涉及到的文件下载请点击
https://download.csdn.net/user/qq_30803353/uploads
//download.csdn.net/download/qq_30803353/12192963
read_write.py文件链接

上一篇下一篇

猜你喜欢

热点阅读