深度学习目标跟踪&&目标检测

深度学习目标检测之——YOLO-v3目标检测(windows端调

2018-08-29  本文已影响1433人  侠之大者_7d3f

前言

目前基于深度学习的目标检测越来越火,其准确度很高。笔者采用Yolo-v3实现目标检测。Yolo-v3基于darknet框架,该框架采用纯c语言,不依赖来其他第三方库,相对于caffe框架在易用性对开发者友好(笔者编译过数次caffe才成功)。本文基于windows平台将yolo-v3编译为动态链接库dll,测试其检测性能。


开发环境

动态链接库.dll的编译过程就不再赘述,相信熟悉C++,编译过opencv的小伙伴都很容易,本文测试使用的的cpu-only版本,笔者编译过cpu-only, gpu两个版本的yolo-v3 dll,需要dll的请点赞支持哦。


Yolo-v3

  1. yolov3.cfg,yolov3的网络结构描述文件
  2. yolov3.weights,yolov3训练好的权重文件,在coco数据集上训练的
  3. coco.names, coco数据集的目标类别文件
#include<iostream>
#include<opencv2/opencv.hpp>
#include<yolo\include\yolo_v2_class.hpp>

using namespace std;
using namespace cv;

const string CFG_FILE = "darknet-master\\cfg\\yolov3.cfg";
const string WEIGHT_FILE = "yolov3.weights";
const string COCO_NAMES = "darknet-master\\cfg\\coco.names";


class Object
{
public:
    Object();
    Object::Object(int id, float confidence, Rect rect, String name);
    ~Object();

public:
    int id;
    float confidence;
    Rect rect;
    String name;

private:

};

Object::Object() {
}

Object::Object(int id,float confidence,Rect rect,String name) {
    this->id = id;
    this->confidence = confidence;
    this->rect = rect;
    this->name = name;
}

Object::~Object() {
}


int main() {

    //--------------------------实例化一个Yolo检测器---------------------------
    Detector yolo_detector(CFG_FILE, WEIGHT_FILE);

    //读取目标类别文件,80类
    vector<String> classNames;
    ifstream fileIn(COCO_NAMES, ios::in);
    if (!fileIn.is_open()) {
        cerr << "failed to load COCO.names!" << endl;
        return -1;
    }
    for (int i = 0; i < 80; i++) {
        char temp1[100];
        fileIn.getline(temp1, 100);
        string temp2(temp1);
        classNames.push_back(String(temp2));

    }

    //---------------------------加载输入图像-----------------------------------
    auto image = Detector::load_image("7.jpg");
    cout << "图像宽度=" << image.w << endl
        << "图像高度=" << image.h << endl
        << "图像通道=" << image.c << endl;

    //-----------------------------目标检测---------------------------------------
    TickMeter t;
    t.start();
    auto res = yolo_detector.detect(image);
    t.stop();
    cout << "YOLO-v3检测时间=" << t.getTimeSec() << "sec" << endl;

    //----------------------------解析检测结果---------------------------------------
    vector<Object> detectObjects;
    for (auto& i:res) {
        int id = i.obj_id;
        float confidence = i.prob;
        String name = classNames[id];
        Rect rect = Rect{ static_cast<int>(i.x),static_cast<int>(i.y),static_cast<int>(i.w),static_cast<int>(i.h) };

        detectObjects.push_back(Object{ id,confidence,rect,name });
    }

    //----------------------------绘制结果---------------------------------------------
    Mat im_src = imread("7.jpg");
    for (auto& i:detectObjects) {
        rectangle(im_src, i.rect, Scalar(0, 255, 255), 2);
        putText(im_src, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0),2);
    }


    imshow("yolo-v3", im_src);
    waitKey(0);
}

class Detector {
    std::shared_ptr<void> detector_gpu_ptr;
    std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
    const int cur_gpu_id;
public:
    float nms = .4;
    bool wait_stream;

    YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
    YOLODLL_API ~Detector();

    YOLODLL_API std::vector<bbox_t> detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);
    YOLODLL_API std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
    static YOLODLL_API image_t load_image(std::string image_filename);
    static YOLODLL_API void free_image(image_t m);
    YOLODLL_API int get_net_width() const;
    YOLODLL_API int get_net_height() const;

    YOLODLL_API std::vector<bbox_t> tracking_id(std::vector<bbox_t> cur_bbox_vec, bool const change_history = true, 
                                                int const frames_story = 10, int const max_dist = 150);

    std::vector<bbox_t> detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false)
    {
        if (img.data == NULL)
            throw std::runtime_error("Image is empty");
        auto detection_boxes = detect(img, thresh, use_mean);
        float wk = (float)init_w / img.w, hk = (float)init_h / img.h;
        for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk;
        return detection_boxes;
    }

#ifdef OPENCV
    std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)
    {
        if(mat.data == NULL)
            throw std::runtime_error("Image is empty");
        auto image_ptr = mat_to_image_resize(mat);
        return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean);
    }

    std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const
    {
        if (mat.data == NULL) return std::shared_ptr<image_t>(NULL);
        cv::Mat det_mat;
        cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));
        return mat_to_image(det_mat);
    }

    static std::shared_ptr<image_t> mat_to_image(cv::Mat img_src)
    {
        cv::Mat img;
        cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR);
        std::shared_ptr<image_t> image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; });
        std::shared_ptr<IplImage> ipl_small = std::make_shared<IplImage>(img);
        *image_ptr = ipl_to_image(ipl_small.get());
        return image_ptr;
    }

private:

    static image_t ipl_to_image(IplImage* src)
    {
        unsigned char *data = (unsigned char *)src->imageData;
        int h = src->height;
        int w = src->width;
        int c = src->nChannels;
        int step = src->widthStep;
        image_t out = make_image_custom(w, h, c);
        int count = 0;

        for (int k = 0; k < c; ++k) {
            for (int i = 0; i < h; ++i) {
                int i_step = i*step;
                for (int j = 0; j < w; ++j) {
                    out.data[count++] = data[i_step + j*c + k] / 255.;
                }
            }
        }

        return out;
    }

    static image_t make_empty_image(int w, int h, int c)
    {
        image_t out;
        out.data = 0;
        out.h = h;
        out.w = w;
        out.c = c;
        return out;
    }

    static image_t make_image_custom(int w, int h, int c)
    {
        image_t out = make_empty_image(w, h, c);
        out.data = (float *)calloc(h*w*c, sizeof(float));
        return out;
    }

#endif  // OPENCV

};

YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);

输入:配置文件(.cfg) 权重文件(.weight) gui_id表示使用的哪个GPU

  1. 加载输入图像
static YOLODLL_API image_t load_image(std::string image_filename);

输入:图像名称
此方法为静态方法,将二维图像转为张量Tensor


0.jpg
1.jpg
2.jpg

3.jpg
4.jpg
5.jpg

测试结果

image.png image.png image.png image.png image.png image.png image.png image.png image.png

从以上测试结果来看,yolo-v3的准确度上性能非凡,较小尺寸的目标也可以检测到。相对于MobileNet-SSD(v1版本)准确度上要好。


End

本文主要实现来了windows平台下yolo-v3的快速测试使用,关于yolo网络结构的设计,yolo模型的训练,下期再详细介绍,感谢甜心的大力支持。

上一篇 下一篇

猜你喜欢

热点阅读