python爬取百度地图迁徙-城内出行强度
2020-02-26 本文已影响0人
AI信仰者
百度地图迁徙链接为 :http://qianxi.baidu.com/
建议尽早爬取数据,以后可能会关闭
代码为:
import csv
import json
import os
import random
import time
from urllib import request
import requests
from utils.read_write import readTXT, writeOneJSON
os.chdir(r'D:\data\人口数据\百度迁徙大数据\全国城市省份市内流入流出')
def requerts_url(url):
try:
response = requests.get(url, timeout=100, headers=headers)
return response
except:
print(url)
headers = {"User-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"}
opener = request.build_opener()
opener.add_headers = [headers]
request.install_opener(opener)
date_list = []
lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')
for i in range(1, 389):
obj = lines[i].split(',')
firsturl = "http://huiyan.baidu.com/migration/internalflowhistory.jsonp?dt=city&" \
"id=" + obj[0] + "&date=20200226&callback=jsonp"
print(firsturl)
randint_data = random.randint(0, 6)
time.sleep(randint_data)
response = requerts_url(firsturl)
r = response.text[6:-1] # 去头去尾
writeOneJSON(r, "城内出行强度_" + obj[1] + ".json")
data_dict = json.loads(r) # 字典化
if data_dict['errmsg'] == 'SUCCESS':
data_list = data_dict['data']['list']
with open("城内出行强度_" + obj[1] + ".csv", "w+", newline="") as csv_file:
writer = csv.writer(csv_file)
header = ["日期", "出行强度"]
writer.writerow(header) #
date_list2 = []
for key in data_list:
dictList1 = []
dictList1.append(key)
dictList1.append(data_list[key])
date_list2.append(dictList1)
date_list2.sort()
for data in date_list2:
writer.writerow(data)
其中涉及到的文件下载请点击
https://download.csdn.net/user/qq_30803353/uploads
//download.csdn.net/download/qq_30803353/12192963
read_write.py文件链接