C++ opencv 推理yolov5模型
2023-11-21 本文已影响0人
leon0514
- 预处理
void YoloV5::warpAffine(cv::Mat& src_image, cv::Mat& dst_image, float *d2i)
{
int src_image_width = src_image.cols;
int src_image_height = src_image.rows;
float scale_x = image_width_ / (float)src_image_width;
float scale_y = image_height_ / (float)src_image_height;
float scale = std::min(scale_x, scale_y);
float i2d[6];
i2d[0] = scale;
i2d[1] = 0;
i2d[2] = (-scale * src_image_width + image_width_ + scale - 1) * 0.5;
i2d[3] = 0;
i2d[4] = scale;
i2d[5] = (-scale * src_image_height + image_height_ + scale - 1) * 0.5;
cv::Mat m2x3_i2d(2, 3, CV_32F, i2d);
cv::Mat m2x3_d2i(2, 3, CV_32F, d2i);
cv::invertAffineTransform(m2x3_i2d, m2x3_d2i);
dst_image.create(image_height_, image_width_, CV_8UC3);
cv::warpAffine(src_image, dst_image, m2x3_i2d, dst_image.size(), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));
}
- 后处理
void YoloV5::decode(std::vector<cv::Mat>& outs,
BoxArray& result_boxes,
float *d2i,
int src_image_width,
int src_image_height)
{
BoxArray boxes;
int cols = outs[0].size[2];
int rows = outs[0].size[1];
float* predict = (float*)outs[0].data;
int num_classes = cols - 5;
for(int i = 0; i < rows; ++i)
{
float* pitem = predict + i * cols;
float objness = pitem[4];
if (objness < confidence_threshold_)
{
continue;
}
float* pclass = pitem + 5;
int label = std::max_element(pclass, pclass + num_classes) - pclass;
float prob = pclass[label];
float confidence = prob * objness;
if(confidence < confidence_threshold_)
{
continue;
}
float cx = pitem[0];
float cy = pitem[1];
float width = pitem[2];
float height = pitem[3];
// 通过反变换恢复到图像尺度
float left = (cx - width * 0.5) * d2i[0] + d2i[2];
float top = (cy - height * 0.5) * d2i[0] + d2i[5];
float right = (cx + width * 0.5) * d2i[0] + d2i[2];
float bottom = (cy + height * 0.5) * d2i[0] + d2i[5];
boxes.emplace_back(left, top, right, bottom, confidence, label);
}
std::sort(boxes.begin(), boxes.end(), [](Box& a, Box& b){return a.confidence > b.confidence;});
std::vector<bool> remove_flags(boxes.size());
result_boxes.reserve(result_boxes.size());
auto iou = [](const Box& a, const Box& b){
int cross_left = std::max(a.left, b.left);
int cross_top = std::max(a.top, b.top);
int cross_right = std::min(a.right, b.right);
int cross_bottom = std::min(a.bottom, b.bottom);
int cross_area = std::max(0, cross_right - cross_left) * std::max(0, cross_bottom - cross_top);
int union_area = std::max(0.f, a.right - a.left) * std::max(0.f, a.bottom - a.top)
+ std::max(0.f, b.right - b.left) * std::max(0.f, b.bottom - b.top) - cross_area;
if(cross_area == 0 || union_area == 0) return 0.0f;
return 1.0f * cross_area / union_area;
};
for(int i = 0; i < boxes.size(); ++i)
{
if(remove_flags[i]) continue;
auto& ibox = boxes[i];
result_boxes.emplace_back(ibox);
for (int j = i + 1; j < boxes.size(); ++j)
{
if (remove_flags[j]) continue;
auto& jbox = boxes[j];
if (ibox.class_id == jbox.class_id)
{
// class matched
if (iou(ibox, jbox) >= nms_threshold_)
remove_flags[j] = true;
}
}
}
}
- 整体代码
hpp
#include <vector>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
namespace yolo
{
struct Box{
float left, top, right, bottom, confidence;
int class_id;
Box() = default;
Box(float left, float top, float right, float bottom, float confidence, int class_id)
:left(left), top(top), right(right), bottom(bottom), confidence(confidence), class_id(class_id)
{}
};
typedef std::vector<Box> BoxArray;
class YoloV5{
public:
YoloV5(std::string model_path, float confidence_threshold, float nms_threshold, int image_width, int image_height)
:confidence_threshold_(confidence_threshold),
nms_threshold_(nms_threshold),
image_width_(image_width),
image_height_(image_height)
{
net_ = cv::dnn::readNet(model_path);
}
BoxArray detect(cv::Mat image);
private:
void warpAffine(cv::Mat& src_image, cv::Mat& dst_image, float *d2i);
void decode(std::vector<cv::Mat>& outs,
BoxArray& result_boxes,
float *d2i,
int src_image_width,
int src_image_height);
private:
cv::dnn::Net net_;
float confidence_threshold_;
float nms_threshold_;
int image_width_;
int image_height_;
};
}
#endif
cpp
#include "yolov5.hpp"
#include <algorithm>
namespace yolo
{
void YoloV5::warpAffine(cv::Mat& src_image, cv::Mat& dst_image, float *d2i)
{
int src_image_width = src_image.cols;
int src_image_height = src_image.rows;
float scale_x = image_width_ / (float)src_image_width;
float scale_y = image_height_ / (float)src_image_height;
float scale = std::min(scale_x, scale_y);
float i2d[6];
i2d[0] = scale;
i2d[1] = 0;
i2d[2] = (-scale * src_image_width + image_width_ + scale - 1) * 0.5;
i2d[3] = 0;
i2d[4] = scale;
i2d[5] = (-scale * src_image_height + image_height_ + scale - 1) * 0.5;
cv::Mat m2x3_i2d(2, 3, CV_32F, i2d);
cv::Mat m2x3_d2i(2, 3, CV_32F, d2i);
cv::invertAffineTransform(m2x3_i2d, m2x3_d2i);
dst_image.create(image_height_, image_width_, CV_8UC3);
cv::warpAffine(src_image, dst_image, m2x3_i2d, dst_image.size(), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));
}
void YoloV5::decode(std::vector<cv::Mat>& outs,
BoxArray& result_boxes,
float *d2i,
int src_image_width,
int src_image_height)
{
BoxArray boxes;
int cols = outs[0].size[2];
int rows = outs[0].size[1];
float* predict = (float*)outs[0].data;
int num_classes = cols - 5;
for(int i = 0; i < rows; ++i)
{
float* pitem = predict + i * cols;
float objness = pitem[4];
if (objness < confidence_threshold_)
{
continue;
}
float* pclass = pitem + 5;
int label = std::max_element(pclass, pclass + num_classes) - pclass;
float prob = pclass[label];
float confidence = prob * objness;
if(confidence < confidence_threshold_)
{
continue;
}
float cx = pitem[0];
float cy = pitem[1];
float width = pitem[2];
float height = pitem[3];
// 通过反变换恢复到图像尺度
float left = (cx - width * 0.5) * d2i[0] + d2i[2];
float top = (cy - height * 0.5) * d2i[0] + d2i[5];
float right = (cx + width * 0.5) * d2i[0] + d2i[2];
float bottom = (cy + height * 0.5) * d2i[0] + d2i[5];
boxes.emplace_back(left, top, right, bottom, confidence, label);
}
std::sort(boxes.begin(), boxes.end(), [](Box& a, Box& b){return a.confidence > b.confidence;});
std::vector<bool> remove_flags(boxes.size());
result_boxes.reserve(result_boxes.size());
auto iou = [](const Box& a, const Box& b){
int cross_left = std::max(a.left, b.left);
int cross_top = std::max(a.top, b.top);
int cross_right = std::min(a.right, b.right);
int cross_bottom = std::min(a.bottom, b.bottom);
int cross_area = std::max(0, cross_right - cross_left) * std::max(0, cross_bottom - cross_top);
int union_area = std::max(0.f, a.right - a.left) * std::max(0.f, a.bottom - a.top)
+ std::max(0.f, b.right - b.left) * std::max(0.f, b.bottom - b.top) - cross_area;
if(cross_area == 0 || union_area == 0) return 0.0f;
return 1.0f * cross_area / union_area;
};
for(int i = 0; i < boxes.size(); ++i)
{
if(remove_flags[i]) continue;
auto& ibox = boxes[i];
result_boxes.emplace_back(ibox);
for (int j = i + 1; j < boxes.size(); ++j)
{
if (remove_flags[j]) continue;
auto& jbox = boxes[j];
if (ibox.class_id == jbox.class_id)
{
// class matched
if (iou(ibox, jbox) >= nms_threshold_)
remove_flags[j] = true;
}
}
}
}
BoxArray YoloV5::detect(cv::Mat image)
{
float d2i[6];
cv::Mat affine_image;
warpAffine(image, affine_image, d2i);
std::vector<cv::Mat> outs;
auto blob = cv::dnn::blobFromImage(affine_image, 1 / 255.0, cv::Size(image_width_, image_height_), cv::Scalar(0, 0, 0), true, false);
net_.setInput(blob);
net_.forward(outs, net_.getUnconnectedOutLayersNames());
BoxArray result;
decode(outs, result, d2i, image.cols, image.rows);
return result;
}
}