gpu版本相位相关
2021-07-26 本文已影响0人
寽虎非虫003
零、序言
opencv
提供了有相位相关的计算代码,但是只有cpu
的版本,速度虽然不错,但是也不满足需求,于是想做gpu
版本的;
一、cpu版本
cv::Point2d cv::phaseCorrelate(InputArray _src1, InputArray _src2, InputArray _window, double* response)
{
CV_INSTRUMENT_REGION();
Mat src1 = _src1.getMat();
Mat src2 = _src2.getMat();
Mat window = _window.getMat();
CV_Assert( src1.type() == src2.type());
CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_64FC1 );
CV_Assert( src1.size == src2.size);
if(!window.empty())
{
CV_Assert( src1.type() == window.type());
CV_Assert( src1.size == window.size);
}
int M = getOptimalDFTSize(src1.rows);
int N = getOptimalDFTSize(src1.cols);
Mat padded1, padded2, paddedWin;
if(M != src1.rows || N != src1.cols)
{
copyMakeBorder(src1, padded1, 0, M - src1.rows, 0, N - src1.cols, BORDER_CONSTANT, Scalar::all(0));
copyMakeBorder(src2, padded2, 0, M - src2.rows, 0, N - src2.cols, BORDER_CONSTANT, Scalar::all(0));
if(!window.empty())
{
copyMakeBorder(window, paddedWin, 0, M - window.rows, 0, N - window.cols, BORDER_CONSTANT, Scalar::all(0));
}
}
else
{
padded1 = src1;
padded2 = src2;
paddedWin = window;
}
Mat FFT1, FFT2, P, Pm, C;
// perform window multiplication if available
if(!paddedWin.empty())
{
// apply window to both images before proceeding...
multiply(paddedWin, padded1, padded1);
multiply(paddedWin, padded2, padded2);
}
// execute phase correlation equation
// Reference: http://en.wikipedia.org/wiki/Phase_correlation
dft(padded1, FFT1, DFT_REAL_OUTPUT);
dft(padded2, FFT2, DFT_REAL_OUTPUT);
mulSpectrums(FFT1, FFT2, P, 0, true);
magSpectrums(P, Pm);
divSpectrums(P, Pm, C, 0, false); // FF* / |FF*| (phase correlation equation completed here...)
idft(C, C); // gives us the nice peak shift location...
fftShift(C); // shift the energy to the center of the frame.
// locate the highest peak
Point peakLoc;
minMaxLoc(C, NULL, NULL, NULL, &peakLoc);
// get the phase shift with sub-pixel accuracy, 5x5 window seems about right here...
Point2d t;
t = weightedCentroid(C, peakLoc, Size(5, 5), response);
// max response is M*N (not exactly, might be slightly larger due to rounding errors)
if(response)
*response /= M*N;
// adjust shift relative to image center...
Point2d center((double)padded1.cols / 2.0, (double)padded1.rows / 2.0);
return (center - t);
}
但是上面这个函数依赖于4个没有导出的私有函数
namespace cv
{
static void magSpectrums( InputArray _src, OutputArray _dst)
{
Mat src = _src.getMat();
int depth = src.depth(), cn = src.channels(), type = src.type();
int rows = src.rows, cols = src.cols;
int j, k;
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
if(src.depth() == CV_32F)
_dst.create( src.rows, src.cols, CV_32FC1 );
else
_dst.create( src.rows, src.cols, CV_64FC1 );
Mat dst = _dst.getMat();
dst.setTo(0);//Mat elements are not equal to zero by default!
bool is_1d = (rows == 1 || (cols == 1 && src.isContinuous() && dst.isContinuous()));
if( is_1d )
cols = cols + rows - 1, rows = 1;
int ncols = cols*cn;
int j0 = cn == 1;
int j1 = ncols - (cols % 2 == 0 && cn == 1);
if( depth == CV_32F )
{
const float* dataSrc = src.ptr<float>();
float* dataDst = dst.ptr<float>();
size_t stepSrc = src.step/sizeof(dataSrc[0]);
size_t stepDst = dst.step/sizeof(dataDst[0]);
if( !is_1d && cn == 1 )
{
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
{
if( k == 1 )
dataSrc += cols - 1, dataDst += cols - 1;
dataDst[0] = dataSrc[0]*dataSrc[0];
if( rows % 2 == 0 )
dataDst[(rows-1)*stepDst] = dataSrc[(rows-1)*stepSrc]*dataSrc[(rows-1)*stepSrc];
for( j = 1; j <= rows - 2; j += 2 )
{
dataDst[j*stepDst] = (float)std::sqrt((double)dataSrc[j*stepSrc]*dataSrc[j*stepSrc] +
(double)dataSrc[(j+1)*stepSrc]*dataSrc[(j+1)*stepSrc]);
}
if( k == 1 )
dataSrc -= cols - 1, dataDst -= cols - 1;
}
}
for( ; rows--; dataSrc += stepSrc, dataDst += stepDst )
{
if( is_1d && cn == 1 )
{
dataDst[0] = dataSrc[0]*dataSrc[0];
if( cols % 2 == 0 )
dataDst[j1] = dataSrc[j1]*dataSrc[j1];
}
for( j = j0; j < j1; j += 2 )
{
dataDst[j] = (float)std::sqrt((double)dataSrc[j]*dataSrc[j] + (double)dataSrc[j+1]*dataSrc[j+1]);
}
}
}
else
{
const double* dataSrc = src.ptr<double>();
double* dataDst = dst.ptr<double>();
size_t stepSrc = src.step/sizeof(dataSrc[0]);
size_t stepDst = dst.step/sizeof(dataDst[0]);
if( !is_1d && cn == 1 )
{
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
{
if( k == 1 )
dataSrc += cols - 1, dataDst += cols - 1;
dataDst[0] = dataSrc[0]*dataSrc[0];
if( rows % 2 == 0 )
dataDst[(rows-1)*stepDst] = dataSrc[(rows-1)*stepSrc]*dataSrc[(rows-1)*stepSrc];
for( j = 1; j <= rows - 2; j += 2 )
{
dataDst[j*stepDst] = std::sqrt(dataSrc[j*stepSrc]*dataSrc[j*stepSrc] +
dataSrc[(j+1)*stepSrc]*dataSrc[(j+1)*stepSrc]);
}
if( k == 1 )
dataSrc -= cols - 1, dataDst -= cols - 1;
}
}
for( ; rows--; dataSrc += stepSrc, dataDst += stepDst )
{
if( is_1d && cn == 1 )
{
dataDst[0] = dataSrc[0]*dataSrc[0];
if( cols % 2 == 0 )
dataDst[j1] = dataSrc[j1]*dataSrc[j1];
}
for( j = j0; j < j1; j += 2 )
{
dataDst[j] = std::sqrt(dataSrc[j]*dataSrc[j] + dataSrc[j+1]*dataSrc[j+1]);
}
}
}
}
static void divSpectrums( InputArray _srcA, InputArray _srcB, OutputArray _dst, int flags, bool conjB)
{
Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
int rows = srcA.rows, cols = srcA.cols;
int j, k;
CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
_dst.create( srcA.rows, srcA.cols, type );
Mat dst = _dst.getMat();
CV_Assert(dst.data != srcA.data); // non-inplace check
CV_Assert(dst.data != srcB.data); // non-inplace check
bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()));
if( is_1d && !(flags & DFT_ROWS) )
cols = cols + rows - 1, rows = 1;
int ncols = cols*cn;
int j0 = cn == 1;
int j1 = ncols - (cols % 2 == 0 && cn == 1);
if( depth == CV_32F )
{
const float* dataA = srcA.ptr<float>();
const float* dataB = srcB.ptr<float>();
float* dataC = dst.ptr<float>();
float eps = FLT_EPSILON; // prevent div0 problems
size_t stepA = srcA.step/sizeof(dataA[0]);
size_t stepB = srcB.step/sizeof(dataB[0]);
size_t stepC = dst.step/sizeof(dataC[0]);
if( !is_1d && cn == 1 )
{
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
{
if( k == 1 )
dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
dataC[0] = dataA[0] / (dataB[0] + eps);
if( rows % 2 == 0 )
dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA] / (dataB[(rows-1)*stepB] + eps);
if( !conjB )
for( j = 1; j <= rows - 2; j += 2 )
{
double denom = (double)dataB[j*stepB]*dataB[j*stepB] +
(double)dataB[(j+1)*stepB]*dataB[(j+1)*stepB] + (double)eps;
double re = (double)dataA[j*stepA]*dataB[j*stepB] +
(double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] -
(double)dataA[j*stepA]*dataB[(j+1)*stepB];
dataC[j*stepC] = (float)(re / denom);
dataC[(j+1)*stepC] = (float)(im / denom);
}
else
for( j = 1; j <= rows - 2; j += 2 )
{
double denom = (double)dataB[j*stepB]*dataB[j*stepB] +
(double)dataB[(j+1)*stepB]*dataB[(j+1)*stepB] + (double)eps;
double re = (double)dataA[j*stepA]*dataB[j*stepB] -
(double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] +
(double)dataA[j*stepA]*dataB[(j+1)*stepB];
dataC[j*stepC] = (float)(re / denom);
dataC[(j+1)*stepC] = (float)(im / denom);
}
if( k == 1 )
dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
}
}
for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
{
if( is_1d && cn == 1 )
{
dataC[0] = dataA[0] / (dataB[0] + eps);
if( cols % 2 == 0 )
dataC[j1] = dataA[j1] / (dataB[j1] + eps);
}
if( !conjB )
for( j = j0; j < j1; j += 2 )
{
double denom = (double)(dataB[j]*dataB[j] + dataB[j+1]*dataB[j+1] + eps);
double re = (double)(dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1]);
double im = (double)(dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1]);
dataC[j] = (float)(re / denom);
dataC[j+1] = (float)(im / denom);
}
else
for( j = j0; j < j1; j += 2 )
{
double denom = (double)(dataB[j]*dataB[j] + dataB[j+1]*dataB[j+1] + eps);
double re = (double)(dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1]);
double im = (double)(dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1]);
dataC[j] = (float)(re / denom);
dataC[j+1] = (float)(im / denom);
}
}
}
else
{
const double* dataA = srcA.ptr<double>();
const double* dataB = srcB.ptr<double>();
double* dataC = dst.ptr<double>();
double eps = DBL_EPSILON; // prevent div0 problems
size_t stepA = srcA.step/sizeof(dataA[0]);
size_t stepB = srcB.step/sizeof(dataB[0]);
size_t stepC = dst.step/sizeof(dataC[0]);
if( !is_1d && cn == 1 )
{
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
{
if( k == 1 )
dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
dataC[0] = dataA[0] / (dataB[0] + eps);
if( rows % 2 == 0 )
dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA] / (dataB[(rows-1)*stepB] + eps);
if( !conjB )
for( j = 1; j <= rows - 2; j += 2 )
{
double denom = dataB[j*stepB]*dataB[j*stepB] +
dataB[(j+1)*stepB]*dataB[(j+1)*stepB] + eps;
double re = dataA[j*stepA]*dataB[j*stepB] +
dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
double im = dataA[(j+1)*stepA]*dataB[j*stepB] -
dataA[j*stepA]*dataB[(j+1)*stepB];
dataC[j*stepC] = re / denom;
dataC[(j+1)*stepC] = im / denom;
}
else
for( j = 1; j <= rows - 2; j += 2 )
{
double denom = dataB[j*stepB]*dataB[j*stepB] +
dataB[(j+1)*stepB]*dataB[(j+1)*stepB] + eps;
double re = dataA[j*stepA]*dataB[j*stepB] -
dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
double im = dataA[(j+1)*stepA]*dataB[j*stepB] +
dataA[j*stepA]*dataB[(j+1)*stepB];
dataC[j*stepC] = re / denom;
dataC[(j+1)*stepC] = im / denom;
}
if( k == 1 )
dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
}
}
for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
{
if( is_1d && cn == 1 )
{
dataC[0] = dataA[0] / (dataB[0] + eps);
if( cols % 2 == 0 )
dataC[j1] = dataA[j1] / (dataB[j1] + eps);
}
if( !conjB )
for( j = j0; j < j1; j += 2 )
{
double denom = dataB[j]*dataB[j] + dataB[j+1]*dataB[j+1] + eps;
double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1];
double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1];
dataC[j] = re / denom;
dataC[j+1] = im / denom;
}
else
for( j = j0; j < j1; j += 2 )
{
double denom = dataB[j]*dataB[j] + dataB[j+1]*dataB[j+1] + eps;
double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1];
double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1];
dataC[j] = re / denom;
dataC[j+1] = im / denom;
}
}
}
}
static void fftShift(InputOutputArray _out)
{
Mat out = _out.getMat();
if(out.rows == 1 && out.cols == 1)
{
// trivially shifted.
return;
}
std::vector<Mat> planes;
split(out, planes);
int xMid = out.cols >> 1;
int yMid = out.rows >> 1;
bool is_1d = xMid == 0 || yMid == 0;
if(is_1d)
{
int is_odd = (xMid > 0 && out.cols % 2 == 1) || (yMid > 0 && out.rows % 2 == 1);
xMid = xMid + yMid;
for(size_t i = 0; i < planes.size(); i++)
{
Mat tmp;
Mat half0(planes[i], Rect(0, 0, xMid + is_odd, 1));
Mat half1(planes[i], Rect(xMid + is_odd, 0, xMid, 1));
half0.copyTo(tmp);
half1.copyTo(planes[i](Rect(0, 0, xMid, 1)));
tmp.copyTo(planes[i](Rect(xMid, 0, xMid + is_odd, 1)));
}
}
else
{
int isXodd = out.cols % 2 == 1;
int isYodd = out.rows % 2 == 1;
for(size_t i = 0; i < planes.size(); i++)
{
// perform quadrant swaps...
Mat q0(planes[i], Rect(0, 0, xMid + isXodd, yMid + isYodd));
Mat q1(planes[i], Rect(xMid + isXodd, 0, xMid, yMid + isYodd));
Mat q2(planes[i], Rect(0, yMid + isYodd, xMid + isXodd, yMid));
Mat q3(planes[i], Rect(xMid + isXodd, yMid + isYodd, xMid, yMid));
if(!(isXodd || isYodd))
{
Mat tmp;
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp);
q2.copyTo(q1);
tmp.copyTo(q2);
}
else
{
Mat tmp0, tmp1, tmp2 ,tmp3;
q0.copyTo(tmp0);
q1.copyTo(tmp1);
q2.copyTo(tmp2);
q3.copyTo(tmp3);
tmp0.copyTo(planes[i](Rect(xMid, yMid, xMid + isXodd, yMid + isYodd)));
tmp3.copyTo(planes[i](Rect(0, 0, xMid, yMid)));
tmp1.copyTo(planes[i](Rect(0, yMid, xMid, yMid + isYodd)));
tmp2.copyTo(planes[i](Rect(xMid, 0, xMid + isXodd, yMid)));
}
}
}
merge(planes, out);
}
static Point2d weightedCentroid(InputArray _src, cv::Point peakLocation, cv::Size weightBoxSize, double* response)
{
Mat src = _src.getMat();
int type = src.type();
CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
int minr = peakLocation.y - (weightBoxSize.height >> 1);
int maxr = peakLocation.y + (weightBoxSize.height >> 1);
int minc = peakLocation.x - (weightBoxSize.width >> 1);
int maxc = peakLocation.x + (weightBoxSize.width >> 1);
Point2d centroid;
double sumIntensity = 0.0;
// clamp the values to min and max if needed.
if(minr < 0)
{
minr = 0;
}
if(minc < 0)
{
minc = 0;
}
if(maxr > src.rows - 1)
{
maxr = src.rows - 1;
}
if(maxc > src.cols - 1)
{
maxc = src.cols - 1;
}
if(type == CV_32FC1)
{
const float* dataIn = src.ptr<float>();
dataIn += minr*src.cols;
for(int y = minr; y <= maxr; y++)
{
for(int x = minc; x <= maxc; x++)
{
centroid.x += (double)x*dataIn[x];
centroid.y += (double)y*dataIn[x];
sumIntensity += (double)dataIn[x];
}
dataIn += src.cols;
}
}
else
{
const double* dataIn = src.ptr<double>();
dataIn += minr*src.cols;
for(int y = minr; y <= maxr; y++)
{
for(int x = minc; x <= maxc; x++)
{
centroid.x += (double)x*dataIn[x];
centroid.y += (double)y*dataIn[x];
sumIntensity += dataIn[x];
}
dataIn += src.cols;
}
}
if(response)
*response = sumIntensity;
sumIntensity += DBL_EPSILON; // prevent div0 problems...
centroid.x /= sumIntensity;
centroid.y /= sumIntensity;
return centroid;
}
}
二、修改的gpu版本
Point2d phaseCorrelate_gpu(Mat &src1, Mat &src2, InputArray &_window, double* response)
{
//CV_INSTRUMENT_REGION();
//Mat src1 = _src1.getMat();
//Mat src2 = _src2.getMat();
//cuda::GpuMat window = _window.getGpuMat();
Mat window = _window.getMat();
//检查
CV_Assert(src1.type() == src2.type());
CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_64FC1);
CV_Assert(src1.size == src2.size);
if (!window.empty())
{
CV_Assert(src1.type() == window.type());
CV_Assert(src1.size == window.size);
//CV_Assert(src1.cols == window.cols);
//CV_Assert(src1.rows == window.rows);
}
//因为要进行离散傅立叶变换,所以为了提高效率,就要得到最佳的图像尺寸
int M = getOptimalDFTSize(src1.rows);
int N = getOptimalDFTSize(src1.cols);
//准备扩充边界
/*cuda::Gpu*/Mat padded1, padded2, paddedWin;
if (M != src1.rows || N != src1.cols)
{
//这儿使用cuda的时间比不使用更慢
copyMakeBorder(src1, padded1, 0, M - src1.rows, 0, N - src1.cols, BORDER_CONSTANT, Scalar::all(0));
copyMakeBorder(src2, padded2, 0, M - src2.rows, 0, N - src2.cols, BORDER_CONSTANT, Scalar::all(0));
if (!window.empty())
{
cuda::copyMakeBorder(window, paddedWin, 0, M - window.rows, 0, N - window.cols, BORDER_CONSTANT, Scalar::all(0));
}
}
else
{
padded1 = src1;
padded2 = src2;
paddedWin = window;
}
// perform window multiplication if available
//执行步骤1,两幅输入图像分别与窗函数逐点相乘
if (!window.empty()/*paddedWin.empty()*/)
{
// apply window to both images before proceeding...
cuda::multiply(paddedWin, padded1, padded1);
cuda::multiply(paddedWin, padded2, padded2);
}
cuda::GpuMat gPadded1, gPadded2, gFFT1, gFFT2, gP, gPm,gC;
gPadded1.upload(padded1);
gPadded2.upload(padded2);
//执行步骤2,分别对两幅图像取傅立叶变换
cuda::dft(gPadded2, gFFT2, padded2.size(), 0);
cuda::dft(gPadded1, gFFT1, gPadded1.size(),0);
//执行步骤3
//计算互功率谱的分子部分,即公式3中的分子,其中P为输出结果,true表示的是对FF2取共轭,所以得到的结果为:P=FFT1×FFT2*,mulSpectrums函数为通用函数
cv::cuda::mulSpectrums(gFFT1, gFFT2, gP, 0, true);
private::cuda::magSpectrums(gP, gPm);////计算互功率谱的分母部分
//计算互功率谱
private::cuda::divSpectrums(gP, gPm, gC, 0, false); // FF* / |FF*| (phase correlation equation completed here...)
//执行步骤4,傅立叶逆变换
cv::cuda::dft(gC, gC, cv::Size(2*(gC.cols-1),gC.rows), DFT_INVERSE| DFT_REAL_OUTPUT); // gives us the nice peak shift location...
Mat C;
gC.download(C);
//平移处理
private::fftShift(C); // shift the energy to the center of the frame.
// locate the highest peak
Point peakLoc;
cuda::minMaxLoc(C, NULL, NULL, NULL, &peakLoc);
// get the phase shift with sub-pixel accuracy, 5x5 window seems about right here...
Point2d t;
t = private::weightedCentroid(C, peakLoc, Size(5, 5), response);
// max response is M*N (not exactly, might be slightly larger due to rounding errors)
if (response)
*response /= M * N;
// adjust shift relative to image center...
Point2d center((double)padded1.cols / 2.0, (double)padded1.rows / 2.0);
return (center - t);
}
修改了两个私有的函数
namespace private
{
namespace cuda
{
//计算模长矩阵
C_G__ void cu_magSpectrums_kernel(cv::cuda::PtrStepSz<float2> cu_src, cv::cuda::PtrStepSz<float1> cu_dst)
{
unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < cu_src.cols && y < cu_src.rows)
{
cu_dst(y, x).x = (float)std::sqrt(pow((double)cu_src(y,x).x,2) + pow((double)cu_src(y,x).y,2));
}
}
//复数矩阵逐元素除法,分母已经是只有实数了
C_G__ void cu_divSpectrums_kernel_1(cv::cuda::PtrStepSz<float2> cu_srcA, cv::cuda::PtrStepSz<float1> cu_srcB, cv::cuda::PtrStepSz<float2> cu_dst)
{
unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
float eps = FLT_EPSILON; // prevent div0 problems
if (x < cu_srcA.cols && y < cu_srcB.rows)
{
cu_dst(y, x).x = (float)((double)cu_srcA(y, x).x / (double)(cu_srcB(y, x).x + eps));
cu_dst(y, x).y = (float)((double)cu_srcA(y, x).y / (double)(cu_srcB(y, x).x+eps));
}
}
//复数矩阵逐元素除法,分母也是复数
C_G__ void cu_divSpectrums_kernel_2(cv::cuda::PtrStepSz<float2> cu_srcA, cv::cuda::PtrStepSz<float2> cu_srcB, cv::cuda::PtrStepSz<float2> cu_dst, bool conjB)
{
unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
float eps = FLT_EPSILON; // prevent div0 problems
if (x < cu_srcA.cols && y < cu_srcB.rows)
{
double denom = (double)(std::pow(cu_srcB(y, x).x, 2) + std::pow(cu_srcB(y, x).y, 2) + eps);
if (!conjB)
{
double real = (double)(cu_srcA(y, x).x*cu_srcB(y, x).x + cu_srcA(y, x).y*cu_srcB(y, x).y);//实部乘实部,虚部乘虚部,加起来是实部
double imag = (double)(cu_srcA(y, x).y*cu_srcB(y, x).x - cu_srcA(y, x).x*cu_srcB(y, x).y);
cu_dst(y, x).x = (float)(real / denom);
cu_dst(y, x).y = (float)(imag/denom);
}
else
{
double real = (double)(cu_srcA(y, x).x*cu_srcB(y, x).x - cu_srcA(y, x).y*cu_srcB(y, x).y);//实部乘实部,虚部乘虚部,加起来是实部
double imag = (double)(cu_srcA(y, x).y*cu_srcB(y, x).x + cu_srcA(y, x).x*cu_srcB(y, x).y);
cu_dst(y, x).x = (float)(real / denom);
cu_dst(y, x).y = (float)(imag / denom);
}
}
}
//计算模长
void magSpectrums(cv::cuda::GpuMat &_src, cv::cuda::GpuMat &_dst)
{
//检查
int nChannels = _src.channels();
CV_Assert(nChannels == 2);
CV_Assert(_src.cols != 0 && _src.rows != 0);
//设置
dim3 block(32, 32);//块大小
dim3 grid((_src.cols + block.x - 1) / block.x, (_src.rows + block.y - 1) / block.y);//块数量
//_dst = cv::cuda::GpuMat(_src.size(), CV_32FC(nChannels));
_dst = cv::cuda::GpuMat(_src.size(), CV_32FC1);
cu_magSpectrums_kernel << <grid, block >> > (_src, _dst);
return;
}
//复数矩阵逐元素除法
void divSpectrums(cv::cuda::GpuMat &_srcA, cv::cuda::GpuMat &_srcB, cv::cuda::GpuMat &_dst, int flags, bool conjB)
{
//检查
int nChannelsA = _srcA.channels();
int nChannelsB = _srcB.channels();
CV_Assert(_srcA.size() != cv::Size(0, 0));
CV_Assert(nChannelsA == 2);
CV_Assert(nChannelsB == 1 || nChannelsB == 2);
CV_Assert(_srcA.cols != 0 && _srcA.rows != 0);
//设置
dim3 block(32, 32);//块大小
dim3 grid((_srcA.cols + block.x - 1) / block.x, (_srcA.rows + block.y - 1) / block.y);//块数量
_dst = cv::cuda::GpuMat(_srcA.size(), CV_32FC(nChannelsA));
switch (nChannelsB)
{
case 1:
cu_divSpectrums_kernel_1 << <grid, block >> > (_srcA, _srcB, _dst);
break;
case 2:
cu_divSpectrums_kernel_2 << <grid, block >> > (_srcA, _srcB, _dst, conjB);
break;
default:
break;
}
return;
}
}
}