【平均、感知、差异】哈希算法 +余弦+直方图距离筛选相似帧
2021-10-07 本文已影响0人
小小杨树
一.构造【平均、感知、差异】哈希算法 +余弦+直方图距离 特征获取方式
1.计算平均哈希算法相似度(ahash)
# 正则化图像
def regularizeImage(img, size = (8, 8)):
return img.resize(size).convert('L')
# 计算hash值
def getHashCode(img, size = (8, 8)):
pixel = []
for i in range(size[0]):
for j in range(size[1]):
pixel.append(img.getpixel((i, j)))
mean = sum(pixel) / len(pixel)
result = []
for i in pixel:
if i > mean:
result.append(1)
else:
result.append(0)
return result
# 比较hash值
def compHashCode(hc1, hc2):
cnt = 0
for i, j in zip(hc1, hc2):
if i == j:
cnt += 1
return cnt
# 计算平均哈希算法相似度
def calaHashSimilarity(img1, img2):
img1 = regularizeImage(img1)
img2 = regularizeImage(img2)
hc1 = getHashCode(img1)
hc2 = getHashCode(img2)
return compHashCode(hc1, hc2)
2.感知哈希计算(phash)
import math
import unittest
# 正则化图像
def regularizeImage(img, size = (32, 32)):
return img.resize(size).convert('L')
# 获得图像像素矩阵
def getMatrix(img):
matrix = []
size = img.size
for i in range(size[1]):
pixel = []
for j in range(size[0]):
pixel.append(img.getpixel((j, i)))
matrix.append(pixel)
return matrix
# 计算系数矩阵
def getCoefficient(length):
matrix = []
sqr = 1.0 / math.sqrt(length)
value = []
for i in range(length):
value.append(sqr)
matrix.append(value)
for i in range(1, length):
value = []
for j in range(0, length):
value.append(math.sqrt(2.0 / length) * math.cos(i * math.pi * (j + 0.5) / length))
matrix.append(value)
return matrix
# 计算矩阵转秩
def getTranspose(matrix):
new_matrix = []
for i in range(len(matrix)):
value = []
for j in range(len(matrix[i])):
value.append(matrix[j][i])
new_matrix.append(value)
return new_matrix
# 计算矩阵乘法
def getMultiply(matrix1, matrix2):
new_matrix = []
for i in range(len(matrix1)):
value = []
for j in range(len(matrix2[i])):
ans = 0.0
for h in range(len(matrix1[i])):
ans += matrix1[i][h] * matrix2[h][j]
value.append(ans)
new_matrix.append(value)
return new_matrix
# 计算DCT
def DCT(matrix):
length = len(matrix)
A = getCoefficient(length)
AT = getTranspose(A)
temp = getMultiply(A, matrix)
DCT_matrix = getMultiply(matrix, AT)
return DCT_matrix
# 计算左上角8*8并转化为list
def submatrix_list(matrix, size = (8, 8)):
value = []
for i in range(size[0]):
for j in range(size[1]):
value.append(matrix[i][j])
return value
# 计算hash值
def getHashCode(sub_list):
length = len(sub_list)
mean = sum(sub_list) / length
result = []
for i in sub_list:
if i > mean:
result.append(1)
else:
result.append(0)
return result
# 比较hash值
def compHashCode(hc1, hc2):
cnt = 0
for i, j in zip(hc1, hc2):
if i == j:
cnt += 1
return cnt
# 计算感知哈希算法相似度
def calpHashSimilarity(img1, img2):
img1 = regularizeImage(img1)
img2 = regularizeImage(img2)
matrix1 = getMatrix(img1)
matrix2 = getMatrix(img2)
DCT1 = DCT(matrix1)
DCT2 = DCT(matrix2)
sub_list1 = submatrix_list(DCT1)
sub_list2 = submatrix_list(DCT2)
hc1 = getHashCode(sub_list1)
hc2 = getHashCode(sub_list2)
return compHashCode(hc1, hc2)
3.获取图像直方图距离
# 正则化图像
def regularizeImage(img, size = (256, 256)):
return img.resize(size).convert('RGB')
# 分块图像4x4
def splitImage(img, part_size = (64, 64)):
w, h = img.size
pw, ph = part_size
data = []
for i in range(0, w, pw):
for j in range(0, h, ph):
data.append(img.crop((i, j, i + pw, j + ph)).copy())
return data
# 利用单块图片的直方图距离计算相似度
def calSingleHistogramSimilarity(hg1, hg2):
if len(hg1) != len(hg2):
raise Exception('样本点个数不一样')
sum = 0
for x1, x2 in zip(hg1, hg2):
if x1 != x2:
sum += 1 - float(abs(x1 - x2) / max(x1, x2))
else:
sum += 1
return sum / len(hg1)
# 利用分块图片的直方图距离计算相似度
def calMultipleHistogramSimilarity(img1, img2):
answer = 0
for sub_img1, sub_img2 in zip(splitImage(img1), splitImage(img2)):
answer += calSingleHistogramSimilarity(sub_img1.histogram(), sub_img2.histogram())
return float(answer / 16.0)
4.差异哈希算法(dhash)
# 正则化图像
def regularizeImage(img, size=(9, 8)):
return img.resize(size).convert('L')
# 计算hash值
def getHashCode(img, size = (9, 8)):
result = []
for i in range(size[0] - 1):
for j in range(size[1]):
current_val = img.getpixel((i, j))
next_val = img.getpixel((i + 1, j))
if current_val > next_val:
result.append(1)
else:
result.append(0)
return result
# 比较hash值
def compHashCode(hc1, hc2):
cnt = 0
for i, j in zip(hc1, hc2):
if i == j:
cnt += 1
return cnt
# 计算差异哈希算法相似度
def caldHashSimilarity(img1, img2):
img1 = regularizeImage(img1)
img2 = regularizeImage(img2)
hc1 = getHashCode(img1)
hc2 = getHashCode(img2)
return compHashCode(hc1, hc2)
5.余弦计算(co)
from PIL import Image
from numpy import average, linalg, dot
def get_thumbnail(image, size=(608, 608), greyscale=False):
image = image.resize(size, Image.ANTIALIAS)
if greyscale:
image = image.convert('L')
return image
def image_similarity_vectors_via_numpy(image1, image2):
image1 = get_thumbnail(image1)
image2 = get_thumbnail(image2)
images = [image1, image2]
vectors = []
norms = []
for image in images:
vector = []
for pixel_tuple in image.getdata():
vector.append(average(pixel_tuple))
vectors.append(vector)
norms.append(linalg.norm(vector, 2))
a, b = vectors
a_norm, b_norm = norms
res = dot(a / a_norm, b / b_norm)
return res
二.构造【平均、感知、差异】哈希算法 +余弦+直方图距离计算汉明距离方式,这五大类计算方法构造完毕可方便于我们后期调用,在这里为了后面的方便使用,我统一使用OPENCV读取图像。
import histogram as htg
import aHash as ah
import pHash as ph
import dHash as dh
import co
def dsh(img1, img2):
# print('依据差异哈希算法计算相似度:{}/{}'.format(dh.caldHashSimilarity(img1, img2), 64))
dHash_Calculation = dh.caldHashSimilarity(img1, img2) / 64
dHash_Calculation = round(dHash_Calculation, 3) # 差异哈希算法计算相似度
return dHash_Calculation
def cin(img1, img2):
cosin = co.image_similarity_vectors_via_numpy(img1, img2) # 获取两张图的cosin值
cosin = round(cosin, 3)
return cosin
def psh(img1, img2):
# print('依据感知哈希算法计算相似度:{}/{}'.format(ph.calpHashSimilarity(img1, img2), 64))
pHash_Calculation = ph.calpHashSimilarity(img1, img2) / 64
pHash_Calculation = round(pHash_Calculation, 3) # 感知哈希算法计算相似度
return pHash_Calculation
def ash(img1, img2):
# print('依据平均哈希算法计算相似度:{}/{}'.format(ah.calaHashSimilarity(img1, img2), 64))
aHash_Calculation = (ah.calaHashSimilarity(img1, img2)) / 64
aHash_Calculation = round(aHash_Calculation, 3)
return aHash_Calculation # 平均哈希算法计算相似度
def ham(img1, img2):
img1_htg = htg.regularizeImage(img1)
img2_htg = htg.regularizeImage(img2)
# print('依据图片直方图距离计算相似度:{}'.format(htg.calMultipleHistogramSimilarity(img1_htg, img2_htg)))
histogram_similarity = htg.calMultipleHistogramSimilarity(img1_htg, img2_htg) # 图片直方图距离计算相似度
histogram_similarity = round(histogram_similarity, 3)
return histogram_similarity
三.通过这五大类特征筛选相似度,数值可以任意设置,视自己情况而定。
import adhp
import os
from PIL import Image
import shutil
def found_same_img(path):
dir_list = [] # 创建一个空列表
for img_name in os.listdir(path):
img_dir = path + img_name # 拼接好每个照片的绝对路径
dir_list.append(img_dir) # 生成一个列表
i = 1
for No, img_message in enumerate(dir_list):
img1 = Image.open(dir_list[No])
img2 = Image.open(dir_list[No + 1])
# 差异哈希算法 余弦 感知哈希算法 平均哈希算法 直方图距离
if adhp.dsh(img1, img2) > 0.98 and adhp.cin(img1, img2) > 0.98 and adhp.psh(img1, img2) > 0.98 and adhp.ash(img1, img2) > 0.98 and adhp.ham(img1, img2) > 0.98:
shutil.copy(dir_list[No], 'D:/wys/image/1/')
i += 1
if i == len(dir_list):
break
if __name__ == '__main__':
path = 'D:/wys/image/2/' # 改
found_same_img(path) # 调用函数