【node爬虫】通过高德接口爬取地址的经纬度
2021-11-15 本文已影响0人
牛老师讲GIS
概述
通过地址的经纬度在GIS中我们称之为地理编码,与之对应的就是通过经纬度获取地址,被称为逆地理编码。不论是地理编码还是逆地理编码,在我们实际的工作、学习中都会有很多的场景,本文讲述的在node环境下,通过高德API实现经纬度数据的获取。
效果
实现
在本示例中,首先将地址数据导入到数据库中,同时将获取到的经纬度数据存储在数据库中。实现代码如下:
const pgConfig = require('./model/pgConfig');
const pg = require('pg');
const pool = new pg.Pool(pgConfig);
let request = require("./utils/request");
const chalk = require('chalk');
// 申请的key
const aKey = {
0: '申请的key1',
1: '申请的key2',
2: '申请的key3'
}
const aUrl = 'https://restapi.amap.com/v3/geocode/geo'
/**
* 获取随机数
* @param minNum
* @param maxNum
* @return {number}
*/
function getRandom(minNum = 80,maxNum = 200){
switch(arguments.length){
case 1:
return parseInt(Math.random()*minNum+1,10);
case 2:
return parseInt(Math.random()*(maxNum-minNum+1)+minNum,10);
default:
return 0;
}
}
/**
* json
* @param url
* @return {Promise<unknown>}
*/
function getJson(url) {
return new Promise(resolve => {
request(url).then(res => {
resolve(res)
});
})
}
async function getAddressLonLat(address) {
try {
const urlFull = `${aUrl}?address=${address}&key=${aKey[getRandom(0, 2)]}`
let res = await getJson(urlFull)
res = JSON.parse(res.toString())
const geocodes = res['geocodes']
if(geocodes && geocodes.length > 0) {
const { location } = geocodes[0]
return location
}
} catch (e) {
console.debug(e)
}
}
function startSpider(table) {
console.time(table)
let index = 0
let count = 0
let rowsData = []
let flag = 0
let updateSql = {
id: [],
lonlat: []
}
const getLocation = () => {
flag = setTimeout(() => {
// 进度输出
if((index > 0 && index % 20 === 0) || index === count) {
console.log(`${table}:----------${index}`)
if(updateSql.id.length > 0) {
let val = ''
updateSql.id.forEach((id, index) => {
val += ` WHEN ${id} THEN '${updateSql.lonlat[index]}' `
})
let sqlUpdate = `
UPDATE ${table}
SET lonlat = CASE id
${val}
END
WHERE id IN (${updateSql.id.join(',')});
`
pool.query(sqlUpdate, (_isErr, _res) => {
if(_isErr) console.error(`${table}----------数据库更新错误`)
index++
getLocation()
if(index >= count) {
clearTimeout(flag)
console.timeEnd(table)
console.log(chalk.green(`----------${table}结束处理----------`))
process.exit(1);// 退出进程
}
})
} else {
if(index >= count) {
clearTimeout(flag)
console.timeEnd(table)
console.log(chalk.green(`----------${table}结束处理----------`))
process.exit(1);// 退出进程
}
index++
getLocation()
}
} else {
const id = rowsData[index] && rowsData[index].id
if(id) {
let sql = `select reg_location as loc, company_name as name
from ${table} where id=${id};`
pool.query(sql, (isErr, res) => {
if(isErr) console.error('数据库查询错误')
else {
const address = [res.rows[0].loc, res.rows[0].name]
getAddressLonLat(address.join('')).then(location => {
if(location) {
updateSql.id.push(id)
updateSql.lonlat.push(location)
}
index++
getLocation()
})
}
});
}
}
}, getRandom())
}
// 查询条数
const countSql = `select id from ${table} where lonlat = '' order by id;`
pool.query(countSql, (isErr, res) => {
if(isErr) console.error('数据库查询错误')
else {
const rows = res.rows
rowsData = rows
count = rows.length
console.log(chalk.red(`----------开始处理${table},共${count}条记录----------`))
getLocation()
}
});
}
pool.connect(isError => {
if(isError) console.error(chalk.red('数据库连接错误'))
startSpider('company_2006')
})
request
代码如下:
const request = require("request");
function handleRequestByPromise(url, options = {}) {
options.url = encodeURI(url)
let op = Object.assign(
{},
{
url: "",
method: "GET",
encoding: null,
header: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
Referer: "https://www.meituri.com"
}
},
options
);
if (op.url === "") {
throw new Error("请求的url地址不正确");
}
const promise = new Promise(function(resolve, reject) {
request(op, (err, response, body) => {
if (err) reject(err);
if (response && response.statusCode === 200) {
resolve(body);
} else {
reject(`请求${url}失败!`);
}
});
});
return promise;
}
module.exports = handleRequestByPromise
pgConfig
代码如下:
const config = {
host: 'ip',
user: 'user',
database: 'database',
password: 'password',
port: 5432,
// 扩展属性
max: 40, // 连接池最大连接数
idleTimeoutMillis: 3000, // 连接最大空闲时间 3s
};
module.exports = config;
说明:
-
aKey
设置了多个,是为了防止账户被封; - 在获取经纬度数据的时候设置了80-200ms的随机延迟,也是为了防止账户被封;