osmpbfreader.h
2018-01-18 本文已影响0人
微雨旧时歌丶
对osmpbfreader.h 库做了修改,增加了metainfo;
osmpbfreader.h头文件:
/*
Copyright (c) 2012, Canal TP
All rights reserved.
*/
#pragma once
#include <fstream>
#include <iostream>
#include <iomanip>
#include <zlib.h> // pbf blobs中使用到了zlib压缩
#include <stdint.h> // 定义了几种扩展的整数类型和宏
#include <netinet/in.h> // 提供了network-byte-order的转换函数
#include <string>
#include <algorithm>
#include <map>
// 描述低级blob存储的头文件
#include <osmpbf/fileformat.pb.h>
// 描述高级OSM对象的头文件
#include <osmpbf/osmformat.pb.h>
// 以byte为单位的最大blob header的大小
const int max_blob_header_size = 64 * 1024; // 64 kB
// 以byte为单位的最大非压缩的blob的大小
const int max_uncompressed_blob_size = 32 * 1024 * 1024; // 32 MB
// 用于double和int之间的转换的经度/纬度分辨率
const int lonlat_resolution = 1000 * 1000 * 1000;
namespace CanalTP {
// 格式化输出,msg 信息,warn警告,fatal错误(考虑改为err)
// 用法:将用于向屏幕显示的std::cout替换为msg()或其它
// 例: msg()<<"开始解析文件"; 行尾自动换行
// msg 绿色
struct msg {
msg() {std::cout << "\033[32m";}
template<typename T>msg & operator<<(const T & t){ std::cout << t; return *this;}
~msg() {std::cout << "\033[0m" << std::endl;}
};
// debug,白色,相当于std::cout<<... <<std::endl;
struct debug {
debug() {std::cout << "\033[0m";}
template<typename T>debug & operator<<(const T & t){ std::cout << t; return *this;}
~debug() {std::cout << "\033[0m" << std::endl;}
};
//warn 黄色
struct warn {
warn() {std::cout << "\033[33m";}
template<typename T>warn & operator<<(const T & t){ std::cout << t; return *this;}
~warn() {std::cout << "\033[0m" << std::endl;}
};
//fatal 红色
struct fatal {
fatal() {std::cout << "\033[31m";}
template<typename T>fatal & operator<<(const T & t){ std::cout << t; return *this;}
~fatal() {std::cout << "\033[0m" << std::endl; exit(1);}
};
// Tags 表示一个对象的 键/值
typedef std::map<std::string, std::string> Tags;
// 获取一个对象的tags的方法
template<typename T>
Tags get_tags(T object, const OSMPBF::PrimitiveBlock &primblock){
Tags result;
for(int i = 0; i < object.keys_size(); ++i){
uint64_t key = object.keys(i);
uint64_t val = object.vals(i);
std::string key_string = primblock.stringtable().s(key);
std::string val_string = primblock.stringtable().s(val);
result[key_string] = val_string;
}
return result;
}
// Info 一个对象(node/way/relation)的 info
struct Info {
int32_t version = 0;
int64_t timestamp = 0;
int64_t changeset = 0;
int32_t uid = 0;
// uint32_t user_sid;
std::string user_name; // 由user_sid查stringtable获得
bool visible = true;
};
// 获取一个对象的info的方法(不包括densenode)
Info get_Info(OSMPBF::Info INFO, const OSMPBF::PrimitiveBlock &primblock) {
Info info;
info.version = INFO.version();
info.timestamp = INFO.timestamp();
info.changeset = INFO.changeset();
info.uid = INFO.uid();
info.user_name = primblock.stringtable().s(INFO.user_sid());
// info.user_sid = INFO.user_sid();
info.visible = INFO.visible();
return info;
}
// Relation对象 的参考成员
struct Reference {
OSMPBF::Relation::MemberType member_type; // 参考成员的类型
uint64_t member_id; // 参考成员的 ID
std::string role; // 参考成员的 role
Reference() {}
Reference(OSMPBF::Relation::MemberType member_type, uint64_t member_id, std::string role) :
member_type(member_type), member_id(member_id), role(role)
{}
};
// 一个 relation 有多个参考成员,以向量表示
typedef std::vector<Reference> References;
// Main function
template<typename Visitor>
void read_osm_pbf(const std::string & filename, Visitor & visitor);
template<typename Visitor>
struct Parser {
private:
Visitor & visitor;
std::ifstream file;
char* buffer;
char* unpack_buffer;
bool finished;
public:
// 构造函数
Parser(const std::string & filename, Visitor & visitor)
: visitor(visitor), file(filename.c_str(), std::ios::binary ), finished(false)
{ // 打开文件、开辟缓冲区
if(!file.is_open())
fatal() << "Unable to open the file " << filename;
buffer = new char[max_uncompressed_blob_size];
unpack_buffer = new char[max_uncompressed_blob_size];
msg() << "Reading the file" << filename;
}
// 析构函数
~Parser(){ // 释放缓冲区空间
delete[] buffer;
delete[] unpack_buffer;
google::protobuf::ShutdownProtobufLibrary();
}
// 主解析函数
void parse(){
while(!this->file.eof() && !finished) {
OSMPBF::BlobHeader header = this->read_header();
if(!this->finished){
int32_t sz = this->read_blob(header);
if(header.type() == "OSMData") {
this->parse_primitiveblock(sz);
}
else if(header.type() == "OSMHeader"){
// -----待设计, 对OSMHeader数据的解析-----
}
else {
warn() << " unknown blob type: " << header.type();
}
}
}
}
// 1. 解析头 (BlobHeader)
OSMPBF::BlobHeader read_header(){
int32_t sz;
OSMPBF::BlobHeader result;
// read the first 4 bytes of the file, this is the size of the blob-header
if( !file.read((char*)&sz, 4) ){
msg() << "We finished reading the file";
this->finished = true;
return result;
}
sz = ntohl(sz);// convert the size from network byte-order to host byte-order
if(sz > max_blob_header_size)
fatal() << "blob-header-size is bigger then allowed " << sz << " > " << max_blob_header_size;
this->file.read(this->buffer, sz);
if(!this->file.good())
fatal() << "unable to read blob-header from file";
// parse the blob-header from the read-buffer
if(!result.ParseFromArray(this->buffer, sz))
fatal() << "unable to parse blob header";
return result;
}
// 2. 解析Blob (包括 OSMHeader 和 OSMData,这里只解析了 OSMData)
int32_t read_blob(const OSMPBF::BlobHeader & header){
OSMPBF::Blob blob;
// size of the following blob
int32_t sz = header.datasize();
if(sz > max_uncompressed_blob_size)
fatal() << "blob-size is bigger then allowed";
if(!this->file.read(buffer, sz))
fatal() << "unable to read blob from file";
if(!blob.ParseFromArray(this->buffer, sz))
fatal() << "unable to parse blob";
// if the blob has uncompressed data
if(blob.has_raw()) {
// size of the blob-data
sz = blob.raw().size();
// check that raw_size is set correctly
if(sz != blob.raw_size())
warn() << " reports wrong raw_size: " << blob.raw_size() << " bytes";
memcpy(unpack_buffer, buffer, sz);
return sz;
}
if(blob.has_zlib_data()) {
sz = blob.zlib_data().size();
z_stream z;
z.next_in = (unsigned char*) blob.zlib_data().c_str();
z.avail_in = sz;
z.next_out = (unsigned char*) unpack_buffer;
z.avail_out = blob.raw_size();
z.zalloc = Z_NULL;
z.zfree = Z_NULL;
z.opaque = Z_NULL;
if(inflateInit(&z) != Z_OK) {
fatal() << "failed to init zlib stream";
}
if(inflate(&z, Z_FINISH) != Z_STREAM_END) {
fatal() << "failed to inflate zlib stream";
}
if(inflateEnd(&z) != Z_OK) {
fatal() << "failed to deinit zlib stream";
}
return z.total_out;
}
if(blob.has_lzma_data()) {
fatal() << "lzma-decompression is not supported";
}
return 0;
}
void parse_primitiveblock(int32_t sz) {
OSMPBF::PrimitiveBlock primblock;
if(!primblock.ParseFromArray(this->unpack_buffer, sz))
fatal() << "unable to parse primitive block";
for(int i = 0, l = primblock.primitivegroup_size(); i < l; i++) {
OSMPBF::PrimitiveGroup pg = primblock.primitivegroup(i);
// Simple Nodes
for(int i = 0; i < pg.nodes_size(); ++i) {
OSMPBF::Node n = pg.nodes(i);
double lon = 0.000000001 * (primblock.lon_offset() + (primblock.granularity() * n.lon())) ;
double lat = 0.000000001 * (primblock.lat_offset() + (primblock.granularity() * n.lat())) ;
visitor.node_callback(n.id(), lon, lat, get_tags(n, primblock),get_Info(n.info(),primblock));
}
// Dense Nodes
if(pg.has_dense()) {
OSMPBF::DenseNodes dn = pg.dense();
uint64_t id = 0;
double lon = 0;
double lat = 0;
int current_kv = 0;
Info dninfo; // densenode info;
uint32_t user_sid;
for(int i = 0; i < dn.id_size(); ++i) { // 共id_size个 densenode
id += dn.id(i); // id、 lon、 lat 的处理方法是累加
lon += 0.000000001 * (primblock.lon_offset() + (primblock.granularity() * dn.lon(i)));
lat += 0.000000001 * (primblock.lat_offset() + (primblock.granularity() * dn.lat(i)));
Tags tags; // 标签的处理方法是记录 keys_vals 的序号(从0开始),
// 每个 densenode的keys_vals 成对出现,中间以 0字节分割
while (current_kv < dn.keys_vals_size() && dn.keys_vals(current_kv) != 0){ // 0字节分割,之前的字符串全部属于一个densenode
uint64_t key = dn.keys_vals(current_kv);
uint64_t val = dn.keys_vals(current_kv + 1);
std::string key_string = primblock.stringtable().s(key);
std::string val_string = primblock.stringtable().s(val);
current_kv += 2;
tags[key_string] = val_string;
}
++current_kv; // 越过 0字节,开始下一个 densenode 的 keys_vals
// DenseInfo 的获取
dninfo.version = dn.denseinfo().version(i); // not delta coded
dninfo.timestamp += dn.denseinfo().timestamp(i);
dninfo.changeset +=dn.denseinfo().changeset(i);
dninfo.uid +=dn.denseinfo().uid(i);
user_sid += dn.denseinfo().user_sid(i);
dninfo.user_name = primblock.stringtable().s(user_sid);
// dninfo.visible = dn.denseinfo().visible(); // question remains ?????
// End DenseInfo 的获取
visitor.node_callback(id, lon, lat, tags, dninfo); //未修改
}
}
// Ways
for(int i = 0; i < pg.ways_size(); ++i) {
OSMPBF::Way w = pg.ways(i);
uint64_t ref = 0;
std::vector<uint64_t> refs;
for(int j = 0; j < w.refs_size(); ++j){
ref += w.refs(j);
refs.push_back(ref);
}
uint64_t id = w.id();
visitor.way_callback(id, get_tags(w, primblock), refs, get_Info(w.info(),primblock));
}
// Relations
for(int i=0; i < pg.relations_size(); ++i){
OSMPBF::Relation rel = pg.relations(i);
uint64_t id = 0;
References refs;
for(int l = 0; l < rel.memids_size(); ++l){
id += rel.memids(l);
refs.push_back(Reference(rel.types(l), id, primblock.stringtable().s(rel.roles_sid(l))));
}
visitor.relation_callback(rel.id(), get_tags(rel, primblock), refs, get_Info(rel.info(),primblock));
}
}
}
};
template<typename Visitor>
void read_osm_pbf(const std::string & filename, Visitor & visitor){
Parser<Visitor> p(filename, visitor);
p.parse();
}
}
打印为debug格式的程序 ShowAll.cc:
/*
To build this file :
g++ -O2 -o counter example_counter.cc -losmpbf -lprotobuf
To run it:
./counter path_to_your_data.osm.pbf
*/
#include "osmpbfreader改.h"
#include <ctime>
#include <algorithm> //for iterator
#include <map>
using namespace CanalTP;
// We need to define a visitor with three methods that will be called while the file is read
struct Counter {
// Three integers count how many times each object type occurs
uint64_t n_ID;
uint64_t w_ID;
uint64_t r_ID; //objects' ID
void printNode(uint64_t node_id, double lon, double lat, const Tags & tags);
// Counter() : nodes(0), ways(0), relations(0) {}
// This method is called every time a Node is read
void node_callback(uint64_t node_id, double lon, double lat, const Tags & tags,const Info & Ninfo){
std::cout<<"node "<<node_id<<"\t("<<lon<<", "<<lat<<")\n";
std::cout<<" version:\t" << Ninfo.version;
if (Ninfo.visible) std::cout<<" visible\n";
else std::cout<<" invisible\n";
std::cout<<" Changeset:\t"<< Ninfo.changeset << std::endl;
std::cout<<" timestamp:\t"<< Ninfo.timestamp <<std::endl;
std::cout<<" user:\t" << Ninfo.uid <<" " << Ninfo.user_name <<std::endl;
std::cout<<" Tags:\t"<<tags.size()<<std::endl;
// tags.sort();
for (auto it = tags.begin(); it != tags.end(); ++it) {
std::cout <<" \""<< it->first <<"\"\t"
<<"=\""<< it->second <<"\"" << std::endl;
}
std::cout<<"\n";
}
// This method is called every time a Way is read
// refs is a vector that contains the reference to the nodes that compose the way
void way_callback(uint64_t way_id, const Tags &tags, const std::vector<uint64_t> &refs, const Info & Winfo){
/*
std::cout<<"way "<<way_id<<"\n";
std::cout<<" version:\t" << Winfo.version;
if (Winfo.visible) std::cout<<" visible\n";
else std::cout<<" invisible\n";
std::cout<<" Changeset:\t"<< Winfo.changeset << std::endl;
std::cout<<" timestamp:\t"<< Winfo.timestamp <<std::endl;
std::cout<<" user:\t" << Winfo.uid <<" " << Winfo.user_name <<std::endl;
std::cout<<" Tags:\t"<<tags.size()<<std::endl;
//tags.sort();
for (auto it = tags.begin(); it != tags.end(); ++it) { //cout tags
std::cout <<" \""<< it->first <<"\"\t"
<<"=\""<< it->second <<"\"" << std::endl;
}
int cnt = refs.size();
std::cout<<" nodes:\t"<<cnt<<std::endl;
for (int i=0;i<cnt;++i) { //cout references
std::cout <<" "<<i<<":\t"<< refs[i] << std::endl;
}
std::cout<<"\n";
*/
}
// This method is called every time a Relation is read
// refs is a vector of pair corresponding of the relation type (Node, Way, Relation) and the reference to the object
void relation_callback(uint64_t relation_id, const Tags &tags, const References &refs, const Info &Rinfo){
/*
std::cout<<"relation "<<relation_id<<"\n";
std::cout<<" Tags:\t"<<tags.size()<<std::endl;
//tags.sort();
for (auto it = tags.begin(); it != tags.end(); ++it) { //cout tags
std::cout <<" \""<< it->first <<"\"\t"
<<"=\""<< it->second <<"\"" << std::endl;
}
int cnt = refs.size();
std::cout<<" members:\t"<<cnt<<std::endl;
for (int i=0;i<cnt;++i) { //cout references
std::cout <<" "<<i<<":\t";
switch (refs[i].member_type){
case 0: std::cout<<"node\t"; break;
case 1: std::cout<<"way\t"; break;
case 2: std::cout<<"relation\t"; break;
}
std::cout<<refs[i].member_id<<"\t\""<<refs[i].role<<"\"\n";
}
std::cout<<"\n";
*/
}
};
int main(int argc, char** argv) {
if(argc != 2) {
std::cout << "Usage: " << argv[0] << " file_to_read.osm.pbf" << std::endl;
return 1;
}
// Let's read that file !
clock_t start,finish;
double totaltime;
start = clock(); //count running time
Counter counter;
read_osm_pbf(argv[1], counter);
// std::cout << "We read " << counter.nodes << " nodes, " << counter.ways << " ways and " << counter.relations << " relations" << std::endl;
finish = clock(); //finish count time
totaltime = (double)(finish-start)/CLOCKS_PER_SEC;
msg() << "Total time:"<<totaltime<<" seconds."; //"msg" is a structure defined in the header,used to cout an msg.
return 0;
}