最小哈希签名代码
2018-12-14 本文已影响0人
mxylulu
import numpy as np
N=100
start=[[1,0,0,1],[0,0,1,0],[0,1,0,1],[1,0,1,1],[0,0,1,0]]
list_2 = [[N,N,N,N],[N,N,N,N]]
def h1():
mid=[0,0,0,0,0]
for i in range(5):
mid[i]=(i+1)%5
return mid
def h2():
mid = [0,0,0,0,0]
for i in range(5):
mid[i]=(3*i+1)%5
return mid
h_1=h1()
h_2=h2()
list_1 = np.column_stack((start,h_1,h_2))
def dp():
for i in range(5):
for j in range(4):
if list_1[i][j] == 1:
list_2[0][j]=min(list_2[0][j],list_1[i][4])
list_2[1][j]=min(list_2[1][j],list_1[i][5])
def compare(a,b,list_2):
dp()
compare_result=0
for i in range(2):
if list_2[i][a-1] == list_2[i][b-1]:
compare_result = compare_result + 1
compare_result = compare_result / len(list_2)
a=str(a)
b=str(b)
ans = compare_result
ans=str(ans)
print(list_1)
print(list_2)
print("S"+a+"和S"+b+"的文本相似度估计是"+ans)
compare(1,3,list_2)