图像金字塔

2020-06-07 本文已影响0人原上的小木屋

使用差分金字塔提取高频成分DOG（Difference of Gaussian）

差分金字塔，DOG（Difference of Gaussian）金字塔是在高斯金字塔的基础上构建起来的，其实生成高斯金字塔的目的就是为了构建DOG金字塔。

import cv2
import numpy as np
import matplotlib.pyplot as plt#照例导入这三个库
# Grayscale
def BGR2GRAY(img):#图像转灰度
    # Grayscale
    gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
    return gray
# Bi-Linear interpolation
def bl_interpolate(img, ax=1., ay=1.):#缩放因子
    if len(img.shape) > 2:
        H, W, C = img.shape#原图像尺寸
    else:
        H, W = img.shape#原图像尺寸
        C = 1
    aH = int(ay * H)#缩放之后的图像尺寸
    aW = int(ax * W)#缩放之后的图像尺寸
    # get position of resized image获取缩放之后原像素所在位置
    y = np.arange(aH).repeat(aW).reshape(aW, -1)#构造与缩放之后图像尺寸等大的矩阵
    x = np.tile(np.arange(aW), (aH, 1))#构造与缩放之后图像尺寸等大的矩阵
    # get position of original position
    y = (y / ay)#缩放之后对应的y坐标
    x = (x / ax)#缩放之后对应的x坐标
    ix = np.floor(x).astype(np.int)#向下取整
    iy = np.floor(y).astype(np.int)#向下取整
    ix = np.minimum(ix, W-2)#划定边界
    iy = np.minimum(iy, H-2)#划定边界
    # get distance
    dx = x - ix#计算dx的值
    dy = y - iy#计算dy的值进行双线性插值
    if C > 1:
        dx = np.repeat(np.expand_dims(dx, axis=-1), C, axis=-1)
        dy = np.repeat(np.expand_dims(dy, axis=-1), C, axis=-1)
    # interpolation
    out = (1-dx) * (1-dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix+1] + (1 - dx) * dy * img[iy+1, ix] + dx * dy * img[iy+1, ix+1]#计算输出矩阵
    out = np.clip(out, 0, 255)
    out = out.astype(np.uint8)
    return out
# Read image
img = cv2.imread("img11.png").astype(np.float)#读取图像
gray = BGR2GRAY(img)#图像转灰度
# Bilinear interpolation
out = bl_interpolate(gray.astype(np.float32), ax=0.5, ay=0.5)#以0.5为因子进行缩放
# Bilinear interpolation
out = bl_interpolate(out, ax=2., ay=2.)#以2为因子进行缩放
out = np.abs(out - gray)#得到的图像矩阵减去原图，就得到了图像的边缘成分
out = out / out.max() * 255
out = out.astype(np.uint8)
# Save result
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.imwrite("out.jpg", out)

高斯金字塔

高斯金字塔是用于生成不同分辨率图像的一种方法。基本原理即为利用插值法先将图像减小为原来的倍数，后续还可以利用插值将图像还原为原来大小然后与原图做减法，这样一操作，图像边缘部分就被提取出来了。高斯金字塔的方法也用于提高图像清晰度的超分辨率成像（Super-Resolution ）深度学习方法。

import cv2
import numpy as np
import matplotlib.pyplot as plt
# Grayscale
def BGR2GRAY(img):
    # Grayscale
    gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
    return gray
# Bi-Linear interpolation
def bl_interpolate(img, ax=1., ay=1.):#该函数代码与上述代码完全一致，是
    if len(img.shape) > 2:
        H, W, C = img.shape
    else:
        H, W = img.shape
        C = 1
    aH = int(ay * H)
    aW = int(ax * W)
    # get position of resized image
    y = np.arange(aH).repeat(aW).reshape(aW, -1)
    x = np.tile(np.arange(aW), (aH, 1))
    # get position of original position
    y = (y / ay)
    x = (x / ax)
    ix = np.floor(x).astype(np.int)
    iy = np.floor(y).astype(np.int)
    ix = np.minimum(ix, W - 2)
    iy = np.minimum(iy, H - 2)
    # get distance
    dx = x - ix
    dy = y - iy
    if C > 1:
        dx = np.repeat(np.expand_dims(dx, axis=-1), C, axis=-1)
        dy = np.repeat(np.expand_dims(dy, axis=-1), C, axis=-1)
    # interpolation
    out = (1 - dx) * (1 - dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix + 1] + (1 - dx) * dy * img[
        iy + 1, ix] + dx * dy * img[iy + 1, ix + 1]
    out = np.clip(out, 0, 255)
    out = out.astype(np.uint8)
    return out
# make image pyramid制作图像金字塔
def make_pyramid(gray):
    # first element
    pyramid = [gray]
    # each scale
    for i in range(1, 6):#制作5个不同分辨率下的图片，加上原图，一共刘张图片
        # define scale
        a = 2. ** i
        # down scale
        p = bl_interpolate(gray, ax=1. / a, ay=1. / a)#设置不同的缩放因子
        # add pyramid list
        pyramid.append(p)
    return pyramid
# Read image
img = cv2.imread("img11.png").astype(np.float)
gray = BGR2GRAY(img)
# pyramid
pyramid = make_pyramid(gray)
for i in range(6):
    cv2.imwrite("out_{}.jpg".format(2 ** i), pyramid[i].astype(np.uint8))
    plt.subplot(1, 6, i + 1)
    plt.imshow(pyramid[i], cmap='gray')
    plt.axis('off')
    plt.xticks(color="None")
    plt.yticks(color="None")
plt.show()

显著图

显著图是将一副图像中容易吸引人的眼睛注意的部分（突出）表现的图像。
虽然现在通常使用深度学习的方法计算显著图，但是一开始人们用图像的RGB成分或者HSV成分创建高斯金字塔，并通过求差来得到显著图
算法原理
我们使用双线性插值调整图像大小至 $\frac{1}{128}$ 、 $\frac{1}{64}$ 、 $\frac{1}{32}$ ……一开始是缩放至 $\frac{1}{128}$ 。
将得到的金字塔（我们将金字塔的各层分别编号为0,1,2,3,4,5）两两求差。
将第2步中求得的差分全部相加，并正规化至[0,255]。
需要注意的是，图像尺寸最好在2的幂次方倍数，因为要层层除以2再乘以2，防止变换前后尺寸不对

import cv2
import numpy as np
import matplotlib.pyplot as plt
# Grayscale
def BGR2GRAY(img):
    # Grayscale
    gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
    return gray
# Bi-Linear interpolation
def bl_interpolate(img, ax=1., ay=1.):
    if len(img.shape) > 2:
        H, W, C = img.shape
    else:
        H, W = img.shape
        C = 1
    aH = int(ay * H)
    aW = int(ax * W)
    # get position of resized image
    y = np.arange(aH).repeat(aW).reshape(aW, -1)
    x = np.tile(np.arange(aW), (aH, 1)
    # get position of original position
    y = (y / ay)
    x = (x / ax)
    ix = np.floor(x).astype(np.int)
    iy = np.floor(y).astype(np.int)
    ix = np.minimum(ix, W - 2)
    iy = np.minimum(iy, H - 2)
    # get distance
    dx = x - ix
    dy = y - iy
    if C > 1:
        dx = np.repeat(np.expand_dims(dx, axis=-1), C, axis=-1)
        dy = np.repeat(np.expand_dims(dy, axis=-1), C, axis=-1)
    # interpolation
    out = (1 - dx) * (1 - dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix + 1] + (1 - dx) * dy * img[
        iy + 1, ix] + dx * dy * img[iy + 1, ix + 1]
    out = np.clip(out, 0, 255)
    out = out.astype(np.uint8)
    return out
# make image pyramid
def make_pyramid(gray):#利用插值法，将图像缩小到1/2、1/4、1/8、1/16、1/32、1/64倍然后再把缩小的变为2、4、8、16、32、64倍，得到和原图同样大小但分辨率依次降低的图像，对他们编号
    # first element
    pyramid = [gray]
    # each scale
    for i in range(1, 6):
        # define scale
        a = 2. ** i
        # down scale
        p = bl_interpolate(gray, ax=1. / a, ay=1. / a)
        # up scale
        p = bl_interpolate(p, ax=a, ay=a)
        # add pyramid list
        pyramid.append(p.astype(np.float32))
    return pyramid
# make saliency map
def saliency_map(pyramid):
    # get shape
    H, W = pyramid[0].shape
    # prepare out image
    out = np.zeros((H, W), dtype=np.float32)
    # add each difference
    out += np.abs(pyramid[0] - pyramid[1])
    out += np.abs(pyramid[0] - pyramid[3])
    out += np.abs(pyramid[0] - pyramid[5])
    out += np.abs(pyramid[1] - pyramid[4])
    out += np.abs(pyramid[2] - pyramid[3])
    out += np.abs(pyramid[3] - pyramid[5])
    # normalization
    out = out / out.max() * 255#将差分图归一到0-255
    return out
# Read image
img = cv2.imread("img11.png").astype(np.float)
# grayscale
gray = BGR2GRAY(img)
# pyramid
pyramid = make_pyramid(gray)
# pyramid -> saliency
out = saliency_map(pyramid)
out = out.astype(np.uint8)
# Save result
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.imwrite("out.jpg", out)

图像金字塔

使用差分金字塔提取高频成分DOG（Difference of Gaussian）

高斯金字塔

显著图

猜你喜欢

热点阅读