darkNet YOLOv4 + labelme 目标检测任务半

本篇文章针对的是darkNet YOLOv4目标检测类的任务的数据半自动标注问题,具体的流程就是:


1. darkNet 读取图片预测

1.1 打包darknet



1.2 配置新项目

2. 预测结果转换为labelme格式

2.1 说明

darknet yolo检测出来的结果是用std::vector<bboxt> 格式存储的,bbox_t是结构体,在yolo_v2_class.hpp中定义如下:

struct bbox_t {
    unsigned int x, y, w, h;       // (x,y) - top-left corner, (w, h) - width & height of bounded box
    float prob;                    // confidence - probability that the object was found correctly
    unsigned int obj_id;           // class of object - from range [0, classes-1]
    unsigned int track_id;         // tracking id for video (0 - untracked, 1 - inf - tracked object)
    unsigned int frames_counter;   // counter of frames on which the object was detected
    float x_3d, y_3d, z_3d;        // center of object (in Meters) if ZED 3D Camera is used


2.2 转换函数

int resultWriteToJson(const std::string jsonPath, const std::string imagePath, const int imgH, const int imgW, const std::vector<bbox_t> &result)
    //jsonPath:      json file  abspath,
    //imagePath:     labelme contain the image path,(write to json)

    std::ofstream out(jsonPath, std::ios::out);//std::ios::app add to  bottom of the file
    if (!out.is_open())
        std::cout << "cant open the " << jsonPath << "!\n";
        return -1;

    // write the json table head 
    out << "{\n" << "\"version\":\"4.5.7\",\n";
    out << "\"flags\" : {},\n";
    out << "\"shapes\" : [\n";

    for (int i = 0; i < result.size(); i++)
        bbox_t box = result[i];
        out << "{\n";
        out << "\"label\":" << "\"" << box.obj_id << "\",\n";
        out << "\"points\":[\n";
        out << "[\n" << box.x << ",\n" << box.y << "\n],\n";
        out << "[\n" << box.x + box.w << ",\n" << box.y + box.h << "\n]\n";
        out << "],\n";
        out << "\"group_id\":null,\n";
        out << "\"shape_type\":\"rectangle\",\n";
        out << "\"flags\":{}\n";
        out << "}";
        if (i != result.size() - 1) out << ",\n";//最后一个}后面没有逗号","

    out << "],\n";
    out << "\"imagePath\" :" << "\"" << imagePath << "\",\n";
    out << "\"imageData\" :" << "null,\n";
    out << "\"imageHeight\":" << imgH << ",\n";
    out << "\"imageWidth\":" << imgW << "\n";
    out << "}\n";

    return 0;

2.3 转换示例



3. 完整源码


#include <iostream>
#include "yolo_v2_class.hpp"    // imported functions from DLL
#include "opencv.hpp"

int  drawResults(cv::Mat img, std::vector<bbox_t> &results)
    if (img.empty())
        std::cout << "drawResults: the image is empty\n";
        return -1;
    if (results.empty())
        std::cout << "drawResults: the results vector is empty\n";
        return -1;
    int img_w = img.cols;
    int img_h = img.rows;
    int expd = 10;
    for (auto &r : results)
        if (int(r.x) - expd <= 0 | int(r.x) + r.w + expd >= img_w | int(r.y) - expd <= 0 | int(r.y) + r.h + expd >= img_h) continue;
        cv::rectangle(img, cv::Rect(r.x, r.y, r.w, r.h), cv::Scalar(0, 255, 255), 2);
        std::string className = std::to_string(r.obj_id);
        putText(img, className, cv::Point2f(r.x, r.y - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(0, 0, 255), 5);
        std::cout << "x:" << r.x << " ,y:" << r.y << "w:" << r.w << "h:" << r.h << std::endl;
        /*cv::namedWindow("results", 0);
        cv::imshow("results", img);

    cv::namedWindow("results", 0);
    cv::imshow("results", img);
    return 0;

std::vector<bbox_t> selectResults(cv::Mat &mat_img, std::vector<bbox_t> &results)
    int img_w = mat_img.cols;
    int img_h = mat_img.rows;
    std::vector<bbox_t> selectedResults;
    int expd = 5;
    for (auto &r : results)
        if (int(r.x) - expd <= 0 | int(r.x) + r.w + expd >= img_w | int(r.y) - expd <= 0 | int(r.y) + r.h + expd >= img_h) continue;
    return selectedResults;

int resultWriteToJson(const std::string jsonPath, const std::string imagePath, const int imgH, const int imgW, const std::vector<bbox_t> &result)
    //jsonPath:      json file  abspath,
    //imagePath:     labelme contain the image path,(write to json)

    // a labelme json format annotation
  "version": "4.5.7",
  "flags": {},
  "shapes": [
      "label": "0",
      "points": [
      "group_id": null,
      "shape_type": "rectangle",
      "flags": {}
      "label": "1",
      "points": [
      "group_id": null,
      "shape_type": "rectangle",
      "flags": {}
  "imagePath": "000000012.bmp",
  "imageData": null,
  "imageHeight": 2000,
  "imageWidth": 2400

    std::ofstream out(jsonPath, std::ios::out);//std::ios::app add to  bottom of the file
    if (!out.is_open())
        std::cout << "cant open the " << jsonPath << "!\n";
        return -1;

    // write the json table head 
    out << "{\n" << "\"version\":\"4.5.7\",\n";
    out << "\"flags\" : {},\n";
    out << "\"shapes\" : [\n";

    for (int i = 0; i < result.size(); i++)
        bbox_t box = result[i];
        out << "{\n";
        out << "\"label\":" << "\"" << box.obj_id << "\",\n";
        out << "\"points\":[\n";
        out << "[\n" << box.x << ",\n" << box.y << "\n],\n";
        out << "[\n" << box.x + box.w << ",\n" << box.y + box.h << "\n]\n";
        out << "],\n";
        out << "\"group_id\":null,\n";
        out << "\"shape_type\":\"rectangle\",\n";
        out << "\"flags\":{}\n";
        out << "}";
        if (i != result.size() - 1) out << ",\n";//最后一个}后面没有逗号","

    out << "],\n";
    out << "\"imagePath\" :" << "\"" << imagePath << "\",\n";
    out << "\"imageData\" :" << "null,\n";
    out << "\"imageHeight\":" << imgH << ",\n";
    out << "\"imageWidth\":" << imgW << "\n";
    out << "}\n";

    return 0;

 int  demo1()
    std::string rootPath = "D:/mydoc/VS-proj/SMTDetector/x64/Release/";
    //label name file path
    std::string  names_file = rootPath + "data/SMTDetector.names";
    //config file path
    std::string  cfg_file = rootPath + "cfg/SMTDetector.cfg";
    //weights file path
    std::string  weights_file = rootPath + "model/SMTDetector.weights";
    //image file path
    //std::string imagePath = rootPath + "data/del/0-5.bmp";
    std::string imagePath = "K:\\imageData\\SMTdataset\\image\\000000001.bmp";

    //init the detector
    Detector detector(cfg_file, weights_file);

    cv::Mat img = cv::imread(imagePath);
    if (img.empty())
        std::cout << "the image is empty\n";
        return -1;

    std::vector<bbox_t> results = detector.detect(img);
    results = selectResults(img, results);

    //visualize the results
    drawResults(img, results);

    resultWriteToJson("aaaa.json", "0-1.bmp", img.rows, img.cols, results);

    return 0;

 int  demo2()
     std::string rootPath = "D:/mydoc/VS-proj/SMTDetector/x64/Release/";
     //label name file path
     std::string  names_file = rootPath + "data/SMTDetector.names";
     //config file path
     std::string  cfg_file = rootPath + "cfg/SMTDetector.cfg";
     //weights file path
     std::string  weights_file = rootPath + "model/SMTDetector.weights";
     //image file path list
     std::string imageFolder = rootPath + "data/del";

     std::vector<cv::String> imageList;
     cv::glob(imageFolder, imageList);

     //init the detector
     Detector detector(cfg_file, weights_file);

     int num = 0;
     for (auto &r : imageList)
         cv::Mat img = cv::imread(r);
         std::cout << "imagepath:" << r << std::endl;
         if (img.empty())
             std::cout << "the image is empty\n";

         std::vector<bbox_t> results = detector.detect(img);
         std::vector<bbox_t> ss = selectResults(img, results);
         num += results.size();
         std::cout << "number of thu:" << ss.size() << std::endl;
         //visualize the results
         drawResults(img, ss);

     std::cout << "the total num:" << num << std::endl;

     return 0;

 int  demo3()
     std::string rootPath = "K:/model/SMTDetector/";
     //label name file path
     std::string  names_file = rootPath + "names/SMTDetector.names";
     //config file path
     std::string  cfg_file = rootPath + "cfg/SMTDetector.cfg";
     //weights file path
     std::string  weights_file = rootPath + "model/SMTDetector.weights";
     //image file path list
     std::string imageFolder = "K:\\imageData\\SMTdataset\\smi";

     std::vector<cv::String> imageList;
     cv::glob(imageFolder, imageList);

     //init the detector
     Detector detector(cfg_file, weights_file);

     int num = 0;
     for (auto &r : imageList)
         cv::Mat img = cv::imread(r);
         std::cout << "imagepath:" << r << std::endl;
         if (img.empty())
             std::cout << "the image is empty\n";

         std::vector<bbox_t> results = detector.detect(img);
         results = selectResults(img, results);
         num += results.size();
         //std::cout << "number of thu:" << results.size() << std::endl;

         int index = r.find_last_of("\\");
         std::string imageName = r.substr(index + 1,-1);
         std::string jsonName = imageName.substr(0, imageName.find_last_of(".")) + ".json";
         //std::cout << "json:" << jsonName << "\t image:" << imageName << "\n";
         resultWriteToJson(imageFolder+"\\"+jsonName, imageName, img.rows, img.cols, results);

     std::cout << "the total num:" << num << std::endl;

     return 0;

 int main()
     return 0;
