python日常学习

【平均、感知、差异】哈希算法 +余弦+直方图距离筛选相似帧

2021-10-07  本文已影响0人  小小杨树

一.构造【平均、感知、差异】哈希算法 +余弦+直方图距离 特征获取方式

1.计算平均哈希算法相似度(ahash)
# 正则化图像
def regularizeImage(img, size = (8, 8)):
    return img.resize(size).convert('L')

# 计算hash值
def getHashCode(img, size = (8, 8)):

    pixel = []
    for i in range(size[0]):
        for j in range(size[1]):
            pixel.append(img.getpixel((i, j)))

    mean = sum(pixel) / len(pixel)

    result = []
    for i in pixel:
        if i > mean:
            result.append(1)
        else:
            result.append(0)
    
    return result

# 比较hash值
def compHashCode(hc1, hc2):
    cnt = 0
    for i, j in zip(hc1, hc2):
        if i == j:
            cnt += 1
    return cnt

# 计算平均哈希算法相似度
def calaHashSimilarity(img1, img2):
    img1 = regularizeImage(img1)
    img2 = regularizeImage(img2)
    hc1 = getHashCode(img1)
    hc2 = getHashCode(img2)
    return compHashCode(hc1, hc2)
2.感知哈希计算(phash)
import math
import unittest

# 正则化图像
def regularizeImage(img, size = (32, 32)):
    return img.resize(size).convert('L')

# 获得图像像素矩阵
def getMatrix(img):
    matrix = []
    size = img.size
    for i in range(size[1]):
        pixel = []
        for j in range(size[0]):
            pixel.append(img.getpixel((j, i)))
        matrix.append(pixel)
    return matrix

# 计算系数矩阵
def getCoefficient(length):
    matrix = []
    sqr = 1.0 / math.sqrt(length)
    value = []
    for i in range(length):
        value.append(sqr)
    matrix.append(value)
    for i in range(1, length):
        value = []
        for j in range(0, length):
            value.append(math.sqrt(2.0 / length) * math.cos(i * math.pi * (j + 0.5) / length))
        matrix.append(value)
    return matrix

# 计算矩阵转秩
def getTranspose(matrix):
    new_matrix = []
    for i in range(len(matrix)):
        value = []
        for j in range(len(matrix[i])):
            value.append(matrix[j][i])
        new_matrix.append(value)
    return new_matrix

# 计算矩阵乘法
def getMultiply(matrix1, matrix2):
    new_matrix = []
    for i in range(len(matrix1)):
        value = []
        for j in range(len(matrix2[i])): 
            ans = 0.0
            for h in range(len(matrix1[i])):
                ans += matrix1[i][h] * matrix2[h][j]
            value.append(ans)
        new_matrix.append(value)
    return new_matrix

# 计算DCT
def DCT(matrix):
    length = len(matrix)
    A = getCoefficient(length)
    AT = getTranspose(A)
    temp = getMultiply(A, matrix)
    DCT_matrix = getMultiply(matrix, AT)
    return DCT_matrix

# 计算左上角8*8并转化为list
def submatrix_list(matrix, size = (8, 8)):
    value = []
    for i in range(size[0]):
        for j in range(size[1]):
            value.append(matrix[i][j])
    return value

# 计算hash值
def getHashCode(sub_list):
    length = len(sub_list)
    mean = sum(sub_list) / length
    
    result = []
    for i in sub_list:
        if i > mean:
            result.append(1)
        else:
            result.append(0)

    return result

# 比较hash值
def compHashCode(hc1, hc2):
    cnt = 0
    for i, j in zip(hc1, hc2):
        if i == j:
            cnt += 1
    return cnt

# 计算感知哈希算法相似度
def calpHashSimilarity(img1, img2):
    img1 = regularizeImage(img1)
    img2 = regularizeImage(img2)

    matrix1 = getMatrix(img1)
    matrix2 = getMatrix(img2)

    DCT1 = DCT(matrix1)
    DCT2 = DCT(matrix2)
    
    sub_list1 = submatrix_list(DCT1)
    sub_list2 = submatrix_list(DCT2)

    hc1 = getHashCode(sub_list1)
    hc2 = getHashCode(sub_list2)
    return compHashCode(hc1, hc2)

3.获取图像直方图距离

# 正则化图像
def regularizeImage(img, size = (256, 256)):
    return img.resize(size).convert('RGB')

# 分块图像4x4
def splitImage(img, part_size = (64, 64)):
    w, h = img.size
    pw, ph = part_size
    data = []
    for i in range(0, w, pw):
        for j in range(0, h, ph):
            data.append(img.crop((i, j, i + pw, j + ph)).copy())
    return data

# 利用单块图片的直方图距离计算相似度
def calSingleHistogramSimilarity(hg1, hg2):
    if len(hg1) != len(hg2):
        raise Exception('样本点个数不一样')
    sum = 0
    for x1, x2 in zip(hg1, hg2):
        if x1 != x2:
            sum += 1 - float(abs(x1 - x2) / max(x1, x2))
        else:
            sum += 1
    return sum / len(hg1)

# 利用分块图片的直方图距离计算相似度
def calMultipleHistogramSimilarity(img1, img2):
    answer = 0
    for sub_img1, sub_img2 in zip(splitImage(img1), splitImage(img2)):
        answer += calSingleHistogramSimilarity(sub_img1.histogram(), sub_img2.histogram())
    return float(answer / 16.0)

4.差异哈希算法(dhash)

# 正则化图像
def regularizeImage(img, size=(9, 8)):
    return img.resize(size).convert('L')

# 计算hash值
def getHashCode(img, size = (9, 8)):

    result = []
    for i in range(size[0] - 1):
        for j in range(size[1]):
            current_val = img.getpixel((i, j))
            next_val = img.getpixel((i + 1, j))
            if current_val > next_val:
                result.append(1)
            else:
                result.append(0)
    
    return result

# 比较hash值
def compHashCode(hc1, hc2):
    cnt = 0
    for i, j in zip(hc1, hc2):
        if i == j:
            cnt += 1
    return cnt

# 计算差异哈希算法相似度
def caldHashSimilarity(img1, img2):
    img1 = regularizeImage(img1)
    img2 = regularizeImage(img2)
    hc1 = getHashCode(img1)
    hc2 = getHashCode(img2)
    return compHashCode(hc1, hc2)

5.余弦计算(co)

from PIL import Image
from numpy import average, linalg, dot


def get_thumbnail(image, size=(608, 608), greyscale=False):
    image = image.resize(size, Image.ANTIALIAS)
    if greyscale:
        image = image.convert('L')
    return image


def image_similarity_vectors_via_numpy(image1, image2):
    image1 = get_thumbnail(image1)
    image2 = get_thumbnail(image2)
    images = [image1, image2]
    vectors = []
    norms = []
    for image in images:
        vector = []
        for pixel_tuple in image.getdata():
            vector.append(average(pixel_tuple))
        vectors.append(vector)
        norms.append(linalg.norm(vector, 2))
    a, b = vectors
    a_norm, b_norm = norms
    res = dot(a / a_norm, b / b_norm)
    return res

二.构造【平均、感知、差异】哈希算法 +余弦+直方图距离计算汉明距离方式,这五大类计算方法构造完毕可方便于我们后期调用,在这里为了后面的方便使用,我统一使用OPENCV读取图像。

import histogram as htg
import aHash as ah
import pHash as ph
import dHash as dh
import co


def dsh(img1, img2):
    # print('依据差异哈希算法计算相似度:{}/{}'.format(dh.caldHashSimilarity(img1, img2), 64))
    dHash_Calculation = dh.caldHashSimilarity(img1, img2) / 64
    dHash_Calculation = round(dHash_Calculation, 3)  # 差异哈希算法计算相似度
    return dHash_Calculation


def cin(img1, img2):
    cosin = co.image_similarity_vectors_via_numpy(img1, img2)  # 获取两张图的cosin值
    cosin = round(cosin, 3)
    return cosin


def psh(img1, img2):
    # print('依据感知哈希算法计算相似度:{}/{}'.format(ph.calpHashSimilarity(img1, img2), 64))
    pHash_Calculation = ph.calpHashSimilarity(img1, img2) / 64
    pHash_Calculation = round(pHash_Calculation, 3)  # 感知哈希算法计算相似度
    return pHash_Calculation


def ash(img1, img2):
    # print('依据平均哈希算法计算相似度:{}/{}'.format(ah.calaHashSimilarity(img1, img2), 64))
    aHash_Calculation = (ah.calaHashSimilarity(img1, img2)) / 64
    aHash_Calculation = round(aHash_Calculation, 3)
    return aHash_Calculation  # 平均哈希算法计算相似度


def ham(img1, img2):
    img1_htg = htg.regularizeImage(img1)
    img2_htg = htg.regularizeImage(img2)

    # print('依据图片直方图距离计算相似度:{}'.format(htg.calMultipleHistogramSimilarity(img1_htg, img2_htg)))
    histogram_similarity = htg.calMultipleHistogramSimilarity(img1_htg, img2_htg)  # 图片直方图距离计算相似度
    histogram_similarity = round(histogram_similarity, 3)
    return histogram_similarity

三.通过这五大类特征筛选相似度,数值可以任意设置,视自己情况而定。

import adhp
import os
from PIL import Image
import shutil

def found_same_img(path):
    dir_list = []        # 创建一个空列表
    for img_name in os.listdir(path):
        img_dir = path + img_name    # 拼接好每个照片的绝对路径
        dir_list.append(img_dir)        # 生成一个列表
    i = 1
    for No, img_message in enumerate(dir_list):
        img1 = Image.open(dir_list[No])
        img2 = Image.open(dir_list[No + 1])
        # 差异哈希算法  余弦 感知哈希算法  平均哈希算法  直方图距离
        if adhp.dsh(img1, img2) > 0.98 and adhp.cin(img1, img2) > 0.98 and adhp.psh(img1, img2) > 0.98 and adhp.ash(img1, img2) > 0.98 and adhp.ham(img1, img2) > 0.98:
            shutil.copy(dir_list[No], 'D:/wys/image/1/')
        i += 1
        if i == len(dir_list):
            break


if __name__ == '__main__':
    path = 'D:/wys/image/2/'  # 改
    found_same_img(path)     # 调用函数

上一篇 下一篇

猜你喜欢

热点阅读