OpenCV源码分析(三):高斯模糊
注:为方便分析和解释,文章中的代码是OpenCV源码的简化版,会略去源码中一些如断言,OpenCL,硬件加速等代码。因此详情最好参看源码。
先放一张高斯模糊的效果图,原图750x1024,高斯核大小为5x5,sigma为5.
高斯滤波原理可参看其他资料,本文不再赘述。
简而言之,高斯滤波是另一种平滑滤波,它和盒式滤波的不同在于,它是一种加权平均,离中心像素越近,权重越大,因此它比盒式滤波效果更好,但也更耗时,权重的值符合正态分布:
OpenCV的高斯模糊接口定义为:
void cv::GaussianBlur( InputArray _src,
OutputArray _dst,
Size ksize,
double sigma1,
double sigma2,
int borderType )
其中_src和_dst分别为输入图像和输出图像数据,为Mat类型;ksize为核(也叫滤波窗口,模板)的大小;sigma1和sigma2分别为横轴和纵轴的的值;borderType为边界扩展类型。
接下来看下GaussianBlur函数的实现:
void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
double sigma1, double sigma2,
int borderType )
{
int type = _src.type();
Size size = _src.size();
_dst.create( size, type ); //根据src的大小和类型创建
//源码中这里会判断是否有OpenCL,若有则使用OpenCL的接口执行,在此省略了
int sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
//源码中此处会使用并行方式执行高斯滤波,并返回。本文为分析方便,将它略去了...
Mat kx, ky;
//生成高斯核
createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);
Mat src = _src.getMat();
Mat dst = _dst.getMat();
Point ofs;
Size wsz(src.cols, src.rows);
if(!(borderType & BORDER_ISOLATED))
src.locateROI( wsz, ofs );
//使用可分滤波执行
sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType);
}
createGaussianKernels函数的实现为:
template <typename T>
static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize,
double sigma1, double sigma2 )
{
int depth = CV_MAT_DEPTH(type);
if( sigma2 <= 0 )
sigma2 = sigma1;
// automatic detection of kernel size from sigma
if( ksize.width <= 0 && sigma1 > 0 ) //当输入核的宽小于0时的处理
ksize.width = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
if( ksize.height <= 0 && sigma2 > 0 ) //同上
ksize.height = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
sigma1 = std::max( sigma1, 0. ); //sigma1要大于0
sigma2 = std::max( sigma2, 0. ); //同上
getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F), kx );
if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON )
ky = kx; //如果参数一样,则直接拷贝x轴的和给y轴
else //否则另外生成y轴的核
getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F), ky );
}
getGaussianKernel函数如下:
static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); }
cv::Mat cv::getGaussianKernel( int n, double sigma, int ktype )
{
//这个高斯核的一个近似的表
const int SMALL_GAUSSIAN_SIZE = 7;
static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE] =
{
{1.f},
{0.25f, 0.5f, 0.25f},
{0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f},
{0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f}
};
//当sigma小于或等于0,且核的大小为奇数,且小于7时,用上面的表的数据构造核
const float* fixed_kernel = n % 2 == 1 && n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 ?
small_gaussian_tab[n>>1] : 0;
//核内的数据必须是浮点型
CV_Assert( ktype == CV_32F || ktype == CV_64F );
Mat kernel(n, 1, ktype);
float* cf = kernel.ptr<float>();
double* cd = kernel.ptr<double>();
//若sigma小于等于0,则用公式((n-1)*0.5 - 1)*0.3 + 0.8近似
double sigmaX = sigma > 0 ? sigma : ((n-1)*0.5 - 1)*0.3 + 0.8;
double scale2X = -0.5/(sigmaX*sigmaX);
double sum = 0;
int i;
for( i = 0; i < n; i++ )
{
double x = i - (n-1)*0.5;
//用查表或正态分布公式求值
double t = fixed_kernel ? (double)fixed_kernel[i] : std::exp(scale2X*x*x);
if( ktype == CV_32F ) //若是32位浮点数
{
cf[i] = (float)t;
sum += cf[i];
}
else //否则是64位浮点数
{
cd[i] = t;
sum += cd[i];
}
}
sum = 1./sum;
for( i = 0; i < n; i++ ) //最后还要除以他们的和
{
if( ktype == CV_32F )
cf[i] = (float)(cf[i]*sum);
else
cd[i] *= sum;
}
return kernel;
}
假如我们输入的ksize==5,sigma==5的话,我们生成的核的5个值为0.19205、0.203926、0.2.80、0.203926、0.19205,可见它是对称的。
分别生成了x轴和y轴的kernel后,将调用sepFilter2D,sepFilter2D是可分滤波器,它的意思是,有些二维滤波器可以用两个一维滤波器构造,如f2(x,y)是一个二维滤波器,它等于f1在x、y处值的积:f2(x,y) = f1(x)f1(y)。可分滤波器比直接用二维滤波器更有效率,原理可参见《Fundamentals of Computer Graphics》第9.3节。高斯滤波器和盒式滤波器都是可分滤波器,sepFilter2D函数如下:
void cv::sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray _kernelX, InputArray _kernelY, Point anchor,
double delta, int borderType )
{
Mat src = _src.getMat(), kernelX = _kernelX.getMat(), kernelY = _kernelY.getMat();
if( ddepth < 0 )
ddepth = src.depth();
_dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) );
Mat dst = _dst.getMat();
Point ofs;
Size wsz(src.cols, src.rows);
if( (borderType & BORDER_ISOLATED) == 0 )
src.locateROI( wsz, ofs );
//isContinuous应该表示Mat内的内存是否连续,若连续则直接执行赋值运算符,否则使用clone,具体详情尚未验证
Mat contKernelX = kernelX.isContinuous() ? kernelX : kernelX.clone();
Mat contKernelY = kernelY.isContinuous() ? kernelY : kernelY.clone();
hal::sepFilter2D(src.type(), dst.type(), kernelX.type(),
src.data, src.step, dst.data, dst.step,
dst.cols, dst.rows, wsz.width, wsz.height, ofs.x, ofs.y,
contKernelX.data, kernelX.cols + kernelX.rows - 1,
contKernelY.data, kernelY.cols + kernelY.rows - 1,
anchor.x, anchor.y, delta, borderType & ~BORDER_ISOLATED);
}
继续看hal::sepFilter2D,
void sepFilter2D(int stype, int dtype, int ktype,
uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
int width, int height, int full_width, int full_height,
int offset_x, int offset_y,
uchar * kernelx_data, int kernelx_len,
uchar * kernely_data, int kernely_len,
int anchor_x, int anchor_y, double delta, int borderType)
{
bool res = replacementSepFilter(stype, dtype, ktype,
src_data, src_step, dst_data, dst_step,
width, height, full_width, full_height,
offset_x, offset_y,
kernelx_data, kernelx_len,
kernely_data, kernely_len,
anchor_x, anchor_y, delta, borderType);
if (res) //我也不知道是啥,反正为false
return;
//真正调用这个
ocvSepFilter(stype, dtype, ktype,
src_data, src_step, dst_data, dst_step,
width, height, full_width, full_height,
offset_x, offset_y,
kernelx_data, kernelx_len,
kernely_data, kernely_len,
anchor_x, anchor_y, delta, borderType);
}
ocvSepFilter如下
static void ocvSepFilter(int stype, int dtype, int ktype,
uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
int width, int height, int full_width, int full_height,
int offset_x, int offset_y,
uchar * kernelx_data, int kernelx_len,
uchar * kernely_data, int kernely_len,
int anchor_x, int anchor_y, double delta, int borderType)
{
Mat kernelX(Size(kernelx_len, 1), ktype, kernelx_data);
Mat kernelY(Size(kernely_len, 1), ktype, kernely_data);
Ptr<FilterEngine> f = createSeparableLinearFilter(stype, dtype, kernelX, kernelY,
Point(anchor_x, anchor_y),
delta, borderType & ~BORDER_ISOLATED);
Mat src(Size(width, height), stype, src_data, src_step);
Mat dst(Size(width, height), dtype, dst_data, dst_step);
f->apply(src, dst, Size(full_width, full_height), Point(offset_x, offset_y));
};
从代码中我们看到,和盒式滤波器一样,它也是创建FilterEngine,并调用apply来执行的。createSeparableLinearFilter如下:
cv::Ptr<cv::FilterEngine> cv::createSeparableLinearFilter(
int _srcType, int _dstType,
InputArray __rowKernel, InputArray __columnKernel,
Point _anchor, double _delta,
int _rowBorderType, int _columnBorderType,
const Scalar& _borderValue )
{
Mat _rowKernel = __rowKernel.getMat(), _columnKernel = __columnKernel.getMat();
_srcType = CV_MAT_TYPE(_srcType); //_srcType为16(CV_8UC3),即元素为1uchar类型,3通道
_dstType = CV_MAT_TYPE(_dstType); //同上
int sdepth = CV_MAT_DEPTH(_srcType), ddepth = CV_MAT_DEPTH(_dstType);
int cn = CV_MAT_CN(_srcType);
CV_Assert( cn == CV_MAT_CN(_dstType) );
int rsize = _rowKernel.rows + _rowKernel.cols - 1;
int csize = _columnKernel.rows + _columnKernel.cols - 1;
if( _anchor.x < 0 ) //若anchor小于0则设在核的中心
_anchor.x = rsize/2;
if( _anchor.y < 0 )
_anchor.y = csize/2;
//rtype和ctype都是5(CV_32F),即核的元素类型是32位浮点型
int rtype = getKernelType(_rowKernel,
_rowKernel.rows == 1 ? Point(_anchor.x, 0) : Point(0, _anchor.x));
int ctype = getKernelType(_columnKernel,
_columnKernel.rows == 1 ? Point(_anchor.y, 0) : Point(0, _anchor.y));
Mat rowKernel, columnKernel;
//精度至少也要是32位浮点型
int bdepth = std::max(CV_32F,std::max(sdepth, ddepth));
int bits = 0;
if( sdepth == CV_8U &&
((rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
ddepth == CV_8U) ||
((rtype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) &&
(ctype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) &&
(rtype & ctype & KERNEL_INTEGER) &&
ddepth == CV_16S)) ) //判断为真时,会把核转换成32位整型(并放大256倍),这么做的原因还不太清楚,猜测可能是整型比浮点型计算快吧...
{
bdepth = CV_32S;
bits = ddepth == CV_8U ? 8 : 0;
_rowKernel.convertTo( rowKernel, CV_32S, 1 << bits );
_columnKernel.convertTo( columnKernel, CV_32S, 1 << bits );
bits *= 2;
_delta *= (1 << bits);
}
else
{
if( _rowKernel.type() != bdepth )
_rowKernel.convertTo( rowKernel, bdepth );
else
rowKernel = _rowKernel;
if( _columnKernel.type() != bdepth )
_columnKernel.convertTo( columnKernel, bdepth );
else
columnKernel = _columnKernel;
}
int _bufType = CV_MAKETYPE(bdepth, cn);
//实际上是SymmRowSmallFilter或RowFilter类型
Ptr<BaseRowFilter> _rowFilter = getLinearRowFilter(
_srcType, _bufType, rowKernel, _anchor.x, rtype);
//实际上是SymmColumnFilter或SymmColumnSmallFilter类型
Ptr<BaseColumnFilter> _columnFilter = getLinearColumnFilter(
_bufType, _dstType, columnKernel, _anchor.y, ctype, _delta, bits );
//创建FilterEngine
return Ptr<FilterEngine>( new FilterEngine(Ptr<BaseFilter>(), _rowFilter, _columnFilter,
_srcType, _dstType, _bufType, _rowBorderType, _columnBorderType, _borderValue ));
}
再回到ocvSepFilter函数,它调用FilterEngine的apply开始执行,它和盒式滤波器是一样的,如下:
int FilterEngine::proceed(unsigned char* src, int srcstep, int count,
unsigned char* dst, int dststep)
{
const int *btab = &_borderTab[0];
int esz = 3, btab_esz = _borderElemSize;
unsigned char** brows = &_rows[0];
int bufRows = (int)_rows.size();
int cn = 3;
int width = _roi._width, kwidth = _ksize._width;
int kheight = _ksize._height, ay = _anchor._y;
int dx1 = _dx1, dx2 = _dx2;
int width1 = _roi._width + kwidth - 1;
int xofs1 = std::min(_roi._x, _anchor._x);
bool isSep = isSeparable();
bool makeBorder = (dx1 > 0 || dx2 > 0) && _rowBorderType != BORDER_CONSTANT;
int dy = 0, i = 0;
src -= xofs1*esz;
count = std::min(count, remainingInputRows());
for(;; dst += dststep*i, dy += i)
{
int dcount = bufRows - ay - _startY - _rowCount + _roi._y;
dcount = dcount > 0 ? dcount : bufRows - kheight + 1;
dcount = std::min(dcount, count);
count -= dcount;
for( ; dcount-- > 0; src += srcstep )
{
int bi = (_startY - _startY0 + _rowCount) % bufRows;
unsigned char* brow = alignPtr(&_ringBuf[0], VEC_ALIGN) + bi*_bufStep; //用来存储dst数据
unsigned char* row = isSep ? &_srcRow[0] : brow; //用来存储src数据
if( ++_rowCount > bufRows )
{
--_rowCount;
++_startY;
}
//注意row+dx1*esz指向的数据才是src的数据,row[0]的数据是左边界扩展而来的数据
memcpy( row + dx1*esz, src, (width1 - dx2 - dx1)*esz );
if( makeBorder ) //是否需要补边
{
if( btab_esz*(int)sizeof(int) == esz )
{
const int* isrc = (const int*)src;
int* irow = (int*)row;
for( i = 0; i < dx1*btab_esz; i++ )
irow[i] = isrc[btab[i]];
for( i = 0; i < dx2*btab_esz; i++ )
irow[i + (width1 - dx2)*btab_esz] = isrc[btab[i+dx1*btab_esz]];
}
else
{
for( i = 0; i < dx1*esz; i++ ) //扩展左边界的数据
row[i] = src[btab[i]];
for( i = 0; i < dx2*esz; i++ ) //扩展右边界的数据
row[i + (width1 - dx2)*esz] = src[btab[i+dx1*esz]];
}
}
if( isSep ) {
(*_rowFilter)(row, brow, width, 3); //对一行像素row进行滤波,结果存到brow(_ringBuf)中
}
}
int max_i = std::min(bufRows, _roi._height - (_dstY + dy) + (kheight - 1));
for( i = 0; i < max_i; i++ )
{ //扩展上边界
int srcY = borderInterpolate(_dstY + dy + i + _roi._y - ay,
_wholeSize._height, _columnBorderType);
if( srcY < 0 ) // can happen only with constant border type
brows[i] = alignPtr(&_constBorderRow[0], VEC_ALIGN);
else
{
if( srcY >= _startY + _rowCount )
break;
int bi = (srcY - _startY0) % bufRows;
brows[i] = alignPtr(&_ringBuf[0], VEC_ALIGN) + bi*_bufStep; //注意brows[0]到brows[1]是补边得到的
}
}
if( i < kheight )
break;
i -= kheight - 1;
if( isSeparable() )
(*_columnFilter)((const unsigned char**)brows, dst, dststep, i, _roi._width*cn); //纵向滤波,输出到dst(也是最终输出)
else
(*_filter2D)((const unsigned char**)brows, dst, dststep, i, _roi._width, cn);
}
_dstY += dy;
return dy;
}
和之前盒式滤波器唯一的区别是 (_rowFilter)(row, brow, width, 3)和(_columnFilter)((const unsigned char*)brows, dst, dststep, i, _roi._widthcn)两个滤波的实现不一样。假如我们使用的参数ksize==5,sigma1和sigma2都等于5,则_rowFilter实际上是SymmRowSmallFilter类型,_columnFilter是SymmColumnFilter类型,先看横向滤波SymmColumnSmallFilter的()操作符的实现:
void operator()(const uchar* src, uchar* dst, int width, int cn)
{
//DT,ST都是模板类型,ST是输入数据类型(也就是图像数据类型),本例中为uchar,DT是输出数据类型,为int
int ksize2 = this->ksize/2, ksize2n = ksize2*cn;
const DT* kx = this->kernel.template ptr<DT>() + ksize2;
bool symmetrical = (this->symmetryType & KERNEL_SYMMETRICAL) != 0;
DT* D = (DT*)dst;
//vecOp是仿函数,调用的是SymmRowSmallVec_8u32s类的()操作符,它实际上就是把src的char数据转换为int类型的dst数据
int i = this->vecOp(src, dst, width, cn), j, k; //返回的i是src被移动的字节数
const ST* S = (const ST*)src + i + ksize2n; //src要加上i才是原来的src,加上ksize2n是因为src开始有补边数据
width *= cn;
if( symmetrical )
{
if( this->ksize == 1 && kx[0] == 1 )
{
//省略...
}
else if( this->ksize == 3 )
{
//省略...
}
else if( this->ksize == 5 ) //假设ksize是5
{
DT k0 = kx[0], k1 = kx[1], k2 = kx[2];
if( k0 == -2 && k1 == 0 && k2 == 1 )
for( ; i <= width - 2; i += 2, S += 2 )
{
DT s0 = -2*S[0] + S[-cn*2] + S[cn*2];
DT s1 = -2*S[1] + S[1-cn*2] + S[1+cn*2];
D[i] = s0; D[i+1] = s1;
}
else
for( ; i <= width - 2; i += 2, S += 2 ) //一般跳到这里
{ //每次处理两组数据,从左到右滑动窗口,加权就和,放到s0和s1中
DT s0 = S[0]*k0 + (S[-cn] + S[cn])*k1 + (S[-cn*2] + S[cn*2])*k2;
DT s1 = S[1]*k0 + (S[1-cn] + S[1+cn])*k1 + (S[1-cn*2] + S[1+cn*2])*k2;
D[i] = s0; D[i+1] = s1;
}
}
for( ; i < width; i++, S++ )
{
DT s0 = kx[0]*S[0];
for( k = 1, j = cn; k <= ksize2; k++, j += cn )
s0 += kx[k]*(S[j] + S[-j]);
D[i] = s0;
}
}
else
{
//省略...
}
}
int symmetryType;
};
接下来看SymmColumnFilter的处理,
void operator()(const uchar** src, uchar* dst, int dststep, int count, int width)
{
int ksize2 = this->ksize/2;
const ST* ky = this->kernel.template ptr<ST>() + ksize2;
int i, k;
bool symmetrical = (symmetryType & KERNEL_SYMMETRICAL) != 0;
ST _delta = this->delta;
CastOp castOp = this->castOp0;
src += ksize2; //跳过补边数据
if( symmetrical )
{
for( ; count--; dst += dststep, src++ )
{
DT* D = (DT*)dst;
i = (this->vecOp)(src, dst, width);
#if CV_ENABLE_UNROLLED
//省略...
#endif
for( ; i < width; i++ )
{ //纵向加权求和
ST s0 = ky[0]*((const ST*)src[0])[i] + _delta;
for( k = 1; k <= ksize2; k++ )
s0 += ky[k]*(((const ST*)src[k])[i] + ((const ST*)src[-k])[i]);
D[i] = castOp(s0); //防溢出
}
}
}
else
{
//省略...
}
}