C++ opencv 推理yolov5模型

2023-11-21 本文已影响0人 leon0514

预处理

void YoloV5::warpAffine(cv::Mat& src_image, cv::Mat& dst_image, float *d2i)
{
    int src_image_width  = src_image.cols;
    int src_image_height = src_image.rows;

    float scale_x = image_width_ / (float)src_image_width;
    float scale_y = image_height_ / (float)src_image_height;
    float scale   = std::min(scale_x, scale_y);
    float i2d[6];
    i2d[0] = scale;
    i2d[1] = 0;
    i2d[2] = (-scale * src_image_width + image_width_ + scale - 1) * 0.5;
    i2d[3] = 0;
    i2d[4] = scale;
    i2d[5] = (-scale * src_image_height + image_height_ + scale - 1) * 0.5;

    cv::Mat m2x3_i2d(2, 3, CV_32F, i2d);
    cv::Mat m2x3_d2i(2, 3, CV_32F, d2i);
    cv::invertAffineTransform(m2x3_i2d, m2x3_d2i);

    dst_image.create(image_height_, image_width_, CV_8UC3);
    cv::warpAffine(src_image, dst_image, m2x3_i2d, dst_image.size(), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));
}

后处理

void YoloV5::decode(std::vector<cv::Mat>& outs,
    BoxArray& result_boxes,
    float *d2i,
    int src_image_width,
    int src_image_height)
{
    BoxArray boxes;
    int cols = outs[0].size[2];
    int rows = outs[0].size[1];
    float* predict = (float*)outs[0].data;
    int num_classes = cols - 5;

    for(int i = 0; i < rows; ++i)
    {
        float* pitem = predict + i * cols;
        float objness = pitem[4];
        if (objness < confidence_threshold_)
        {
            continue;
        }

        float* pclass = pitem + 5;
        int label     = std::max_element(pclass, pclass + num_classes) - pclass;
        float prob    = pclass[label];
        float confidence = prob * objness;
        if(confidence < confidence_threshold_)
        {
            continue;
        }
        float cx     = pitem[0];
        float cy     = pitem[1];
        float width  = pitem[2];
        float height = pitem[3];

        // 通过反变换恢复到图像尺度
        float left   = (cx - width * 0.5) * d2i[0] + d2i[2];
        float top    = (cy - height * 0.5) * d2i[0] + d2i[5];
        float right  = (cx + width * 0.5) * d2i[0] + d2i[2];
        float bottom = (cy + height * 0.5) * d2i[0] + d2i[5];
        boxes.emplace_back(left, top, right, bottom, confidence, label);
    }
    std::sort(boxes.begin(), boxes.end(), [](Box& a, Box& b){return a.confidence > b.confidence;});
    std::vector<bool> remove_flags(boxes.size());
    result_boxes.reserve(result_boxes.size());

    auto iou = [](const Box& a, const Box& b){
        int cross_left   = std::max(a.left, b.left);
        int cross_top    = std::max(a.top, b.top);
        int cross_right  = std::min(a.right, b.right);
        int cross_bottom = std::min(a.bottom, b.bottom);

        int cross_area = std::max(0, cross_right - cross_left) * std::max(0, cross_bottom - cross_top);
        int union_area = std::max(0.f, a.right - a.left) * std::max(0.f, a.bottom - a.top)
                        + std::max(0.f, b.right - b.left) * std::max(0.f, b.bottom - b.top) - cross_area;
        if(cross_area == 0 || union_area == 0) return 0.0f;
        return 1.0f * cross_area / union_area;
    };

    for(int i = 0; i < boxes.size(); ++i)
    {
        if(remove_flags[i]) continue;

        auto& ibox = boxes[i];
        result_boxes.emplace_back(ibox);
        for (int j = i + 1; j < boxes.size(); ++j)
        {
            if (remove_flags[j]) continue;

            auto& jbox = boxes[j];
            if (ibox.class_id == jbox.class_id)
            {
                // class matched
                if (iou(ibox, jbox) >= nms_threshold_)
                    remove_flags[j] = true;
            }
        }
    }

}

整体代码
hpp

#include <vector>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

namespace yolo
{

struct Box{
    float left, top, right, bottom, confidence;
    int class_id;

    Box() = default;

    Box(float left, float top, float right, float bottom, float confidence, int class_id)
    :left(left), top(top), right(right), bottom(bottom), confidence(confidence), class_id(class_id)
    {}
};
typedef std::vector<Box> BoxArray;

class YoloV5{

public:
    YoloV5(std::string model_path, float confidence_threshold, float nms_threshold, int image_width, int image_height)
        :confidence_threshold_(confidence_threshold),
        nms_threshold_(nms_threshold),
        image_width_(image_width),
        image_height_(image_height)
    {
        net_ = cv::dnn::readNet(model_path);
    }
    BoxArray detect(cv::Mat image);

private:
    void warpAffine(cv::Mat& src_image, cv::Mat& dst_image, float *d2i);

    void decode(std::vector<cv::Mat>& outs,
        BoxArray& result_boxes,
        float *d2i,
        int src_image_width,
        int src_image_height);

private:
    cv::dnn::Net net_;
    float confidence_threshold_;
    float nms_threshold_;
    int image_width_;
    int image_height_;

};

}


#endif

cpp

#include "yolov5.hpp"

#include <algorithm>

namespace yolo
{
void YoloV5::warpAffine(cv::Mat& src_image, cv::Mat& dst_image, float *d2i)
{
    int src_image_width  = src_image.cols;
    int src_image_height = src_image.rows;

    float scale_x = image_width_ / (float)src_image_width;
    float scale_y = image_height_ / (float)src_image_height;
    float scale   = std::min(scale_x, scale_y);
    float i2d[6];
    i2d[0] = scale;
    i2d[1] = 0;
    i2d[2] = (-scale * src_image_width + image_width_ + scale - 1) * 0.5;
    i2d[3] = 0;
    i2d[4] = scale;
    i2d[5] = (-scale * src_image_height + image_height_ + scale - 1) * 0.5;

    cv::Mat m2x3_i2d(2, 3, CV_32F, i2d);
    cv::Mat m2x3_d2i(2, 3, CV_32F, d2i);
    cv::invertAffineTransform(m2x3_i2d, m2x3_d2i);

    dst_image.create(image_height_, image_width_, CV_8UC3);
    cv::warpAffine(src_image, dst_image, m2x3_i2d, dst_image.size(), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));
}

void YoloV5::decode(std::vector<cv::Mat>& outs,
    BoxArray& result_boxes,
    float *d2i,
    int src_image_width,
    int src_image_height)
{
    BoxArray boxes;
    int cols = outs[0].size[2];
    int rows = outs[0].size[1];
    float* predict = (float*)outs[0].data;
    int num_classes = cols - 5;

    for(int i = 0; i < rows; ++i)
    {
        float* pitem = predict + i * cols;
        float objness = pitem[4];
        if (objness < confidence_threshold_)
        {
            continue;
        }

        float* pclass = pitem + 5;
        int label     = std::max_element(pclass, pclass + num_classes) - pclass;
        float prob    = pclass[label];
        float confidence = prob * objness;
        if(confidence < confidence_threshold_)
        {
            continue;
        }
        float cx     = pitem[0];
        float cy     = pitem[1];
        float width  = pitem[2];
        float height = pitem[3];

        // 通过反变换恢复到图像尺度
        float left   = (cx - width * 0.5) * d2i[0] + d2i[2];
        float top    = (cy - height * 0.5) * d2i[0] + d2i[5];
        float right  = (cx + width * 0.5) * d2i[0] + d2i[2];
        float bottom = (cy + height * 0.5) * d2i[0] + d2i[5];
        boxes.emplace_back(left, top, right, bottom, confidence, label);
    }
    std::sort(boxes.begin(), boxes.end(), [](Box& a, Box& b){return a.confidence > b.confidence;});
    std::vector<bool> remove_flags(boxes.size());
    result_boxes.reserve(result_boxes.size());

    auto iou = [](const Box& a, const Box& b){
        int cross_left   = std::max(a.left, b.left);
        int cross_top    = std::max(a.top, b.top);
        int cross_right  = std::min(a.right, b.right);
        int cross_bottom = std::min(a.bottom, b.bottom);

        int cross_area = std::max(0, cross_right - cross_left) * std::max(0, cross_bottom - cross_top);
        int union_area = std::max(0.f, a.right - a.left) * std::max(0.f, a.bottom - a.top)
                        + std::max(0.f, b.right - b.left) * std::max(0.f, b.bottom - b.top) - cross_area;
        if(cross_area == 0 || union_area == 0) return 0.0f;
        return 1.0f * cross_area / union_area;
    };

    for(int i = 0; i < boxes.size(); ++i)
    {
        if(remove_flags[i]) continue;

        auto& ibox = boxes[i];
        result_boxes.emplace_back(ibox);
        for (int j = i + 1; j < boxes.size(); ++j)
        {
            if (remove_flags[j]) continue;

            auto& jbox = boxes[j];
            if (ibox.class_id == jbox.class_id)
            {
                // class matched
                if (iou(ibox, jbox) >= nms_threshold_)
                    remove_flags[j] = true;
            }
        }
    }

}

BoxArray YoloV5::detect(cv::Mat image)
{
    float d2i[6];
    cv::Mat affine_image;
    warpAffine(image, affine_image, d2i);
    std::vector<cv::Mat> outs;
    auto blob = cv::dnn::blobFromImage(affine_image, 1 / 255.0, cv::Size(image_width_, image_height_), cv::Scalar(0, 0, 0), true, false);
    net_.setInput(blob);
    net_.forward(outs, net_.getUnconnectedOutLayersNames());
    BoxArray result;
    decode(outs, result, d2i, image.cols, image.rows);
    return result;
}

}

C++ opencv 推理yolov5模型

猜你喜欢

热点阅读