Opencv CUDA应用 图像峰值查找
图像处理中,经常通过距离变换对粘连物体图像进行分割。而距离变换常常通过峰值查找确定物体图像对质心。查找单通道图像局部峰值对方法,Matlab工具箱中有imregionalmax,python库skimage中feature.peak_local_max,但是OpenCV中没有相关查找局部峰值对函数。论坛中有人讨论使用滑动窗口,通过图像分割成几个局部,查找局部中对全局峰值来确定局部峰值,但是误差可想而知而且对于峰值出现为一个等值平面的话,是否能够找出峰值就取决于窗的大小。
论文《Morphological Grayscale Reconstruction in Image Analysis:
Applications and Effcient Algorithms》中提出了一种通过形态学灰度重建(Morphological Grayscale Reconstruction)对方法来求取局部峰值,Matlab中就是应用该方法实现得,详见imreconstruct说明中对参考文献,正是该篇论文。使用原图像数值偏移-1作为mask,对原图像进行形态学灰度重建,得到重建图。然后用原图像减去重建图就能得求出峰值的位置(大于零处)。
形态学灰度重建(图源于论文)
论文中多种形态学灰度重建算法,其中,运用并行计算的方法如下图。NG表示对应像素点对邻近像素,可取4邻近或8邻近。本文使用8邻近对算法进行实现。
并行算法实现(图源于论文)
使用CUDA对上述算法进行实现,对圆形粘连图像进行距离变换后查找峰值。(该算法的非CUDA实现,详见 【图像处理】灰度图、亮度峰值极值查找)图像效果如下:
原分割图像 距离变换 峰值查找源代码:
imregionmax.cu
#include "cuda.h"
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"
#include "device_functions.h"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/opencv.hpp"
#include "stdio.h"
#include <vector>
#include <iostream>
using namespace cv;
using namespace std;
#define Accuracy 0
typedef unsigned char eleType;
__global__ void DilationStep(eleType *k,eleType *j,unsigned int total)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
unsigned int offset = x + y*blockDim.x*gridDim.x;
unsigned int width = blockDim.x*gridDim.x;
unsigned int heigth = blockDim.y*gridDim.y;
if(offset > total) return;
unsigned int left,right,top,bottom;
left = offset -1;
right = offset+1;
if (x==0) left++;
if (x==width-1) right--;
top = offset - width;
bottom = offset + width;
if (y==0) top += width;
if (y==heigth-1) bottom -= width;
eleType max = j[offset];
if(j[left] - max > Accuracy) max = j[left];
if(j[right] - max > Accuracy) max = j[right];
if(j[bottom]- max > Accuracy) max = j[bottom];
if(j[top] - max > Accuracy) max = j[top];
unsigned int leftbottom,lefttop,righttop,rightbottom;
leftbottom = bottom - 1;
if(x==0) leftbottom++;
rightbottom = bottom + 1;
if(x==width-1) rightbottom--;
lefttop = top - 1;
if(x==0) lefttop++;
righttop = top + 1;
if(x==width-1) righttop--;
if(j[leftbottom] - max > Accuracy) max = j[leftbottom];
if(j[rightbottom] - max > Accuracy) max = j[rightbottom];
if(j[lefttop] - max > Accuracy) max = j[lefttop];
if(j[righttop] - max > Accuracy) max = j[righttop];
k[offset] =max;
}
__global__ void PointwiseMinimum(eleType *I,eleType *J,eleType *K,unsigned int total)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
unsigned int offset = x + y*blockDim.x*gridDim.x;
if(I[offset] - K[offset] <Accuracy)
J[offset] = I[offset];
else
J[offset] = K[offset];
}
#define DIM 16
Mat imregionmax(const Mat *src,eleType h)
{
Mat LocMax = src->clone();
int width = src->cols;
int height = src->rows;
Mat Imask = src->clone();
Mat Jmasker = Imask - h;
Mat K = Jmasker.clone();
Mat Tmp = src->clone();
eleType *Jmasker_dev,*Imask_dev,*K_dev;
cudaMalloc((void**)&Jmasker_dev,width*height*sizeof(eleType));
cudaMemcpy(Jmasker_dev,Jmasker.data,width*height*sizeof(eleType),cudaMemcpyHostToDevice);
cudaMalloc((void**)&Imask_dev,width*height*sizeof(eleType));
cudaMemcpy(Imask_dev,Imask.data,width*height*sizeof(eleType),cudaMemcpyHostToDevice);
cudaMalloc((void**)&K_dev,width*height*sizeof(eleType));
cudaError_t cudaStatus;
while(1)
{
dim3 blocks(64,64);
dim3 threads((blocks.x+width-1)/blocks.x,(blocks.y+height-1)/blocks.y);
DilationStep<<<blocks,threads>>>(K_dev,Jmasker_dev,width*height);
cudaDeviceSynchronize();
cudaStatus= cudaGetLastError();
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
return LocMax;
}
cudaMemcpy(K.data,K_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
PointwiseMinimum<<<blocks,threads>>>(Imask_dev,Jmasker_dev,K_dev,width*height);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
return LocMax;
}
cudaMemcpy(Tmp.data,Jmasker_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
if (memcmp(Tmp.data,Jmasker.data,width*height*sizeof(eleType))==0) break;
else cudaMemcpy(Jmasker.data,Jmasker_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
}
cudaFree(Imask_dev);
cudaFree(Jmasker_dev);
cudaFree(K_dev);
LocMax = (Imask-Jmasker>0);
return LocMax;
}
main.cpp
#include <iostream>
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/opencv.hpp"
using namespace std;
using namespace cv;
typedef unsigned char eleType;
Mat imregionmax(const Mat *src,eleType h);
void LabShow(char wname[20],Mat src,Mat Label,Size offset)
{
Mat sImg = src.clone();
if(sImg.channels()<3)
cvtColor(sImg,sImg,COLOR_GRAY2RGB);
int font_face = cv::FONT_HERSHEY_COMPLEX;
double font_scale = 1;
for(int x=0;x<Label.cols;x++)
{
for(int y=0;y<Label.cols;y++)
{
Point pos;
pos.x=x+offset.width;
pos.y=y+offset.height;
if(Label.at<uchar>(y,x))
putText(sImg,"x",pos,font_face,font_scale,Scalar(0,0,255),3,8);
}
}
imshow(wname,sImg);
}
int main(int argc, char *argv[])
{
Mat src(Size(1024,1024),CV_8UC1);
src.setTo(0);
circle(src,Point(200,200),100,Scalar(255),-1);
circle(src,Point(300,300),100,Scalar(255),-1);
circle(src,Point(400,600),100,Scalar(255),-1);
circle(src,Point(550,600),100,Scalar(255),-1);
namedWindow("input", CV_WINDOW_NORMAL);
imshow("input",src);
Mat dis=Mat(src.size(),CV_32FC1);
distanceTransform(src,dis,CV_DIST_L2,3);
normalize(dis,dis,1.0,0.0,NORM_MINMAX);
dis = dis *255;dis.convertTo(dis,CV_8U);
namedWindow("distance Transfrom", CV_WINDOW_NORMAL);
imshow("distance Transfrom",dis);
namedWindow("peeks", CV_WINDOW_NORMAL);
Mat peeks = imregionmax(&dis,1);
LabShow("peeks",dis,peeks,Size(0,0));
waitKey(0);
return 0;
}
源码下载:
GitHub
参考:
Morphological grayscale reconstruction in image analysis: applications and efficient algorithms
python数字图像处理(19):骨架提取与分水岭算法