osmpbfreader.h

2018-01-18  本文已影响0人  微雨旧时歌丶

对osmpbfreader.h 库做了修改,增加了metainfo;


osmpbfreader.h头文件:

/*
Copyright (c) 2012, Canal TP
All rights reserved.
*/
#pragma once
#include <fstream>
#include <iostream>
#include <iomanip>
#include <zlib.h> // pbf blobs中使用到了zlib压缩
#include <stdint.h> // 定义了几种扩展的整数类型和宏
#include <netinet/in.h> // 提供了network-byte-order的转换函数

#include <string>
#include <algorithm>
#include <map>

// 描述低级blob存储的头文件
#include <osmpbf/fileformat.pb.h>
// 描述高级OSM对象的头文件
#include <osmpbf/osmformat.pb.h>
// 以byte为单位的最大blob header的大小
const int max_blob_header_size = 64 * 1024; // 64 kB
// 以byte为单位的最大非压缩的blob的大小
const int max_uncompressed_blob_size = 32 * 1024 * 1024; // 32 MB
// 用于double和int之间的转换的经度/纬度分辨率
const int lonlat_resolution = 1000 * 1000 * 1000; 

namespace CanalTP {
// 格式化输出,msg 信息,warn警告,fatal错误(考虑改为err)
// 用法:将用于向屏幕显示的std::cout替换为msg()或其它
// 例: msg()<<"开始解析文件";  行尾自动换行

// msg 绿色
struct msg {
    msg() {std::cout << "\033[32m";}
    template<typename T>msg & operator<<(const T & t){ std::cout << t; return *this;}
    ~msg() {std::cout << "\033[0m" << std::endl;}
};
// debug,白色,相当于std::cout<<... <<std::endl;
struct debug {
    debug() {std::cout << "\033[0m";}
    template<typename T>debug & operator<<(const T & t){ std::cout << t; return *this;}
    ~debug() {std::cout << "\033[0m" << std::endl;}
};
//warn 黄色
struct warn {
    warn() {std::cout << "\033[33m";}
    template<typename T>warn & operator<<(const T & t){ std::cout << t; return *this;}
    ~warn() {std::cout << "\033[0m" << std::endl;}
};
//fatal 红色
struct fatal {
    fatal() {std::cout << "\033[31m";}
    template<typename T>fatal & operator<<(const T & t){ std::cout << t; return *this;}
    ~fatal() {std::cout << "\033[0m" << std::endl; exit(1);}
};


// Tags 表示一个对象的 键/值
typedef std::map<std::string, std::string> Tags;
// 获取一个对象的tags的方法
template<typename T>
Tags get_tags(T object, const OSMPBF::PrimitiveBlock &primblock){
    Tags result;
    for(int i = 0; i < object.keys_size(); ++i){
        uint64_t key = object.keys(i);
        uint64_t val = object.vals(i);
        std::string key_string = primblock.stringtable().s(key);
        std::string val_string = primblock.stringtable().s(val);
        result[key_string] = val_string;
    }
    return result;
}
// Info 一个对象(node/way/relation)的 info 
struct Info {
    int32_t version = 0;
    int64_t timestamp = 0;
    int64_t changeset = 0;
    int32_t uid = 0;
//  uint32_t user_sid;
    std::string user_name; // 由user_sid查stringtable获得
    bool visible = true;

};

// 获取一个对象的info的方法(不包括densenode)
Info get_Info(OSMPBF::Info INFO, const OSMPBF::PrimitiveBlock &primblock) {
    Info info;
    info.version = INFO.version();
    info.timestamp = INFO.timestamp();
    info.changeset = INFO.changeset();
    info.uid = INFO.uid();
    info.user_name = primblock.stringtable().s(INFO.user_sid());
//  info.user_sid = INFO.user_sid();
    info.visible = INFO.visible();
    return info;
}

// Relation对象 的参考成员
struct Reference {
    OSMPBF::Relation::MemberType member_type; // 参考成员的类型
    uint64_t member_id; // 参考成员的 ID
    std::string role; // 参考成员的 role

    Reference() {}
    Reference(OSMPBF::Relation::MemberType member_type, uint64_t member_id, std::string role) :
        member_type(member_type), member_id(member_id), role(role)
    {}
};
// 一个 relation 有多个参考成员,以向量表示
typedef std::vector<Reference> References;

// Main function
template<typename Visitor>
void read_osm_pbf(const std::string & filename, Visitor & visitor);

template<typename Visitor>
struct Parser {
private:
    Visitor & visitor;
    std::ifstream file;
    char* buffer;
    char* unpack_buffer;
    bool finished;
public:
    // 构造函数
    Parser(const std::string & filename, Visitor & visitor)
        : visitor(visitor), file(filename.c_str(), std::ios::binary ), finished(false)
    {   // 打开文件、开辟缓冲区
        if(!file.is_open())
            fatal() << "Unable to open the file " << filename;
        buffer = new char[max_uncompressed_blob_size];
        unpack_buffer = new char[max_uncompressed_blob_size];
        msg() << "Reading the file" << filename;
    }
    // 析构函数
    ~Parser(){  // 释放缓冲区空间
        delete[] buffer;
        delete[] unpack_buffer;
        google::protobuf::ShutdownProtobufLibrary();
    }
    // 主解析函数
    void parse(){
        while(!this->file.eof() && !finished) {
            OSMPBF::BlobHeader header = this->read_header();
            if(!this->finished){
                int32_t sz = this->read_blob(header);
                if(header.type() == "OSMData") {
                    this->parse_primitiveblock(sz);
                }
                else if(header.type() == "OSMHeader"){
                    // -----待设计, 对OSMHeader数据的解析-----
                }
                else {
                    warn() << "  unknown blob type: " << header.type();
                }
            }
        }
    }
    // 1. 解析头 (BlobHeader)
    OSMPBF::BlobHeader read_header(){
        int32_t sz;
        OSMPBF::BlobHeader result;

        // read the first 4 bytes of the file, this is the size of the blob-header
        if( !file.read((char*)&sz, 4) ){
            msg() << "We finished reading the file";
            this->finished = true;
            return result;
        }

        sz = ntohl(sz);// convert the size from network byte-order to host byte-order

        if(sz > max_blob_header_size)
            fatal() << "blob-header-size is bigger then allowed " << sz << " > " << max_blob_header_size;

        this->file.read(this->buffer, sz);
        if(!this->file.good())
            fatal() << "unable to read blob-header from file";

        // parse the blob-header from the read-buffer
        if(!result.ParseFromArray(this->buffer, sz))
            fatal() << "unable to parse blob header";
        return result;
    }
    // 2. 解析Blob (包括 OSMHeader 和 OSMData,这里只解析了 OSMData)
    int32_t read_blob(const OSMPBF::BlobHeader & header){
        OSMPBF::Blob blob;
        // size of the following blob
        int32_t sz = header.datasize();

        if(sz > max_uncompressed_blob_size)
            fatal() << "blob-size is bigger then allowed";

        if(!this->file.read(buffer, sz))
            fatal() << "unable to read blob from file";
        if(!blob.ParseFromArray(this->buffer, sz))
            fatal() << "unable to parse blob";

        // if the blob has uncompressed data
        if(blob.has_raw()) {
            // size of the blob-data
            sz = blob.raw().size();

            // check that raw_size is set correctly
            if(sz != blob.raw_size())
                warn() << "  reports wrong raw_size: " << blob.raw_size() << " bytes";

            memcpy(unpack_buffer, buffer, sz);
            return sz;
        }


        if(blob.has_zlib_data()) {
            sz = blob.zlib_data().size();

            z_stream z;
            z.next_in   = (unsigned char*) blob.zlib_data().c_str();
            z.avail_in  = sz;
            z.next_out  = (unsigned char*) unpack_buffer;
            z.avail_out = blob.raw_size();
            z.zalloc    = Z_NULL;
            z.zfree     = Z_NULL;
            z.opaque    = Z_NULL;

            if(inflateInit(&z) != Z_OK) {
                fatal() << "failed to init zlib stream";
            }
            if(inflate(&z, Z_FINISH) != Z_STREAM_END) {
                fatal() << "failed to inflate zlib stream";
            }
            if(inflateEnd(&z) != Z_OK) {
                fatal() << "failed to deinit zlib stream";
            }
            return z.total_out;
        }

        if(blob.has_lzma_data()) {
            fatal() << "lzma-decompression is not supported";
        }
        return 0;
    }

    void parse_primitiveblock(int32_t sz) {
        OSMPBF::PrimitiveBlock primblock;
        if(!primblock.ParseFromArray(this->unpack_buffer, sz))
            fatal() << "unable to parse primitive block";

        for(int i = 0, l = primblock.primitivegroup_size(); i < l; i++) {
            OSMPBF::PrimitiveGroup pg = primblock.primitivegroup(i);

            // Simple Nodes
            for(int i = 0; i < pg.nodes_size(); ++i) {
                OSMPBF::Node n = pg.nodes(i);

                double lon = 0.000000001 * (primblock.lon_offset() + (primblock.granularity() * n.lon())) ;
                double lat = 0.000000001 * (primblock.lat_offset() + (primblock.granularity() * n.lat())) ;

                visitor.node_callback(n.id(), lon, lat, get_tags(n, primblock),get_Info(n.info(),primblock));
            }

            // Dense Nodes
            if(pg.has_dense()) {
                OSMPBF::DenseNodes dn = pg.dense();
                uint64_t id = 0;
                double lon = 0;
                double lat = 0;

                int current_kv = 0;

                Info dninfo; // densenode info;
                uint32_t user_sid;

                for(int i = 0; i < dn.id_size(); ++i) {  // 共id_size个 densenode
                    id += dn.id(i); // id、 lon、 lat 的处理方法是累加
                    lon +=  0.000000001 * (primblock.lon_offset() + (primblock.granularity() * dn.lon(i)));
                    lat +=  0.000000001 * (primblock.lat_offset() + (primblock.granularity() * dn.lat(i)));

                    Tags tags; // 标签的处理方法是记录 keys_vals 的序号(从0开始),
                               // 每个 densenode的keys_vals 成对出现,中间以 0字节分割
                    while (current_kv < dn.keys_vals_size() && dn.keys_vals(current_kv) != 0){ // 0字节分割,之前的字符串全部属于一个densenode
                        uint64_t key = dn.keys_vals(current_kv);
                        uint64_t val = dn.keys_vals(current_kv + 1);
                        std::string key_string = primblock.stringtable().s(key);
                        std::string val_string = primblock.stringtable().s(val);
                        current_kv += 2;
                        tags[key_string] = val_string;
                    }
                    ++current_kv; // 越过 0字节,开始下一个 densenode 的 keys_vals 
                    // DenseInfo 的获取
                    dninfo.version = dn.denseinfo().version(i); // not delta coded
                    dninfo.timestamp += dn.denseinfo().timestamp(i);
                    dninfo.changeset +=dn.denseinfo().changeset(i);
                    dninfo.uid +=dn.denseinfo().uid(i);

                    user_sid += dn.denseinfo().user_sid(i);
                    dninfo.user_name = primblock.stringtable().s(user_sid);

                    // dninfo.visible = dn.denseinfo().visible(); // question remains ?????
                    // End DenseInfo 的获取

                    visitor.node_callback(id, lon, lat, tags, dninfo);  //未修改
                }
            }
            // Ways
            for(int i = 0; i < pg.ways_size(); ++i) {
                OSMPBF::Way w = pg.ways(i);

                uint64_t ref = 0;
                std::vector<uint64_t> refs;
                for(int j = 0; j < w.refs_size(); ++j){
                    ref += w.refs(j);
                    refs.push_back(ref);
                }
                uint64_t id = w.id();

                visitor.way_callback(id, get_tags(w, primblock), refs, get_Info(w.info(),primblock));
            }

            // Relations
            for(int i=0; i < pg.relations_size(); ++i){
                OSMPBF::Relation rel = pg.relations(i);
                uint64_t id = 0;
                References refs;

                for(int l = 0; l < rel.memids_size(); ++l){
                    id += rel.memids(l);
                    refs.push_back(Reference(rel.types(l), id, primblock.stringtable().s(rel.roles_sid(l))));
                }

                visitor.relation_callback(rel.id(), get_tags(rel, primblock), refs, get_Info(rel.info(),primblock));    
            }
        }
    }
};

template<typename Visitor>
void read_osm_pbf(const std::string & filename, Visitor & visitor){
    Parser<Visitor> p(filename, visitor);
    p.parse();
}

}

打印为debug格式的程序 ShowAll.cc:

/*
To build this file :
g++ -O2 -o counter example_counter.cc -losmpbf -lprotobuf

To run it:
./counter path_to_your_data.osm.pbf
*/

#include "osmpbfreader改.h"
#include <ctime>
#include <algorithm> //for iterator
#include <map>
using namespace CanalTP;

// We need to define a visitor with three methods that will be called while the file is read
struct Counter {
    // Three integers count how many times each object type occurs
    uint64_t n_ID;
    uint64_t w_ID;
    uint64_t r_ID; //objects' ID
    
    void printNode(uint64_t node_id, double lon, double lat, const Tags & tags);

    // Counter() : nodes(0), ways(0), relations(0) {}

    // This method is called every time a Node is read
    void node_callback(uint64_t node_id, double lon, double lat, const Tags & tags,const Info & Ninfo){
        std::cout<<"node "<<node_id<<"\t("<<lon<<", "<<lat<<")\n";  
        std::cout<<"  version:\t" << Ninfo.version;
        if (Ninfo.visible) std::cout<<"  visible\n";
        else std::cout<<"  invisible\n";    
        std::cout<<"  Changeset:\t"<< Ninfo.changeset << std::endl;
        std::cout<<"  timestamp:\t"<< Ninfo.timestamp <<std::endl;
        std::cout<<"  user:\t" << Ninfo.uid <<"  " << Ninfo.user_name <<std::endl;
        std::cout<<"  Tags:\t"<<tags.size()<<std::endl;
    // tags.sort();
        for (auto it = tags.begin(); it != tags.end(); ++it) {
            std::cout <<"    \""<< it->first <<"\"\t" 
                <<"=\""<< it->second <<"\"" << std::endl;
        }
        std::cout<<"\n";
    }

    // This method is called every time a Way is read
    // refs is a vector that contains the reference to the nodes that compose the way
    void way_callback(uint64_t way_id, const Tags &tags, const std::vector<uint64_t> &refs, const Info & Winfo){
/*
        std::cout<<"way "<<way_id<<"\n";
        std::cout<<"  version:\t" << Winfo.version;
        if (Winfo.visible) std::cout<<"  visible\n";
        else std::cout<<"  invisible\n";    
        std::cout<<"  Changeset:\t"<< Winfo.changeset << std::endl;
        std::cout<<"  timestamp:\t"<< Winfo.timestamp <<std::endl;
        std::cout<<"  user:\t" << Winfo.uid <<"  " << Winfo.user_name <<std::endl;
        
        std::cout<<"  Tags:\t"<<tags.size()<<std::endl;
        //tags.sort();
        for (auto it = tags.begin(); it != tags.end(); ++it) { //cout tags
            std::cout <<"    \""<< it->first <<"\"\t" 
                <<"=\""<< it->second <<"\"" << std::endl;
        }   
        int cnt = refs.size();
        std::cout<<"  nodes:\t"<<cnt<<std::endl;
        for (int i=0;i<cnt;++i) { //cout references
            std::cout <<"    "<<i<<":\t"<< refs[i] << std::endl;
        }
        std::cout<<"\n";
*/
    }

    // This method is called every time a Relation is read
    // refs is a vector of pair corresponding of the relation type (Node, Way, Relation) and the reference to the object
    void relation_callback(uint64_t relation_id, const Tags &tags, const References &refs, const Info &Rinfo){
/*
        std::cout<<"relation "<<relation_id<<"\n";      
        std::cout<<"  Tags:\t"<<tags.size()<<std::endl;
        //tags.sort();
        for (auto it = tags.begin(); it != tags.end(); ++it) { //cout tags
            std::cout <<"    \""<< it->first <<"\"\t" 
                <<"=\""<< it->second <<"\"" << std::endl;
        }   
        int cnt = refs.size();
        std::cout<<"  members:\t"<<cnt<<std::endl;
        for (int i=0;i<cnt;++i) { //cout references
            std::cout <<"    "<<i<<":\t";
            switch (refs[i].member_type){
                case 0: std::cout<<"node\t"; break;
                case 1: std::cout<<"way\t"; break;
                case 2: std::cout<<"relation\t"; break;
            }

            std::cout<<refs[i].member_id<<"\t\""<<refs[i].role<<"\"\n";
        }   
    std::cout<<"\n";    
*/ 
    }
};


int main(int argc, char** argv) {
     if(argc != 2) {
         std::cout << "Usage: " << argv[0] << " file_to_read.osm.pbf" << std::endl;
         return 1;
     }
    
     // Let's read that file !
     clock_t start,finish;
     double totaltime;
     start = clock(); //count running time

     Counter counter;
     read_osm_pbf(argv[1], counter);
     // std::cout << "We read " << counter.nodes << " nodes, " << counter.ways << " ways and " << counter.relations << " relations" << std::endl;
     
     finish = clock(); //finish count time
     totaltime = (double)(finish-start)/CLOCKS_PER_SEC;
     msg() << "Total time:"<<totaltime<<" seconds."; //"msg" is a structure defined in the header,used to cout an msg.
     return 0;
}
上一篇下一篇

猜你喜欢

热点阅读