自定义皮尔森相似度计算公式
2018-10-11 本文已影响0人
龙小江i
# data
users = {"小明": {"中国合伙人": 5.0, "太平轮": 3.0, "荒野猎人": 4.5, "老炮儿": 5.0, "我的少女时代": 3.0, "夏洛特烦恼": 4.5, "火星救援": 5.0},
"小红":{"小时代4": 4.0, "荒野猎人": 3.0, "我的少女时代": 5.0, "夏洛特烦恼": 5.0, "火星救援": 3.0, "后会无期": 3.0},
"小阳": {"小时代4": 2.0, "中国合伙人": 5.0, "我的少女时代": 3.0, "老炮儿": 5.0, "夏洛特烦恼": 4.5, "速度与激情7": 5.0},
"小四": {"小时代4": 5.0, "中国合伙人": 3.0, "我的少女时代": 4.0, "匆匆那年": 4.0, "速度与激情7": 3.5, "火星救援": 3.5, "后会无期": 4.5},
"六爷": {"小时代4": 2.0, "中国合伙人": 4.0, "荒野猎人": 4.5, "老炮儿": 5.0, "我的少女时代": 2.0},
"小李": {"荒野猎人": 5.0, "盗梦空间": 5.0, "我的少女时代": 3.0, "速度与激情7": 5.0, "蚁人": 4.5, "老炮儿": 4.0, "后会无期": 3.5},
"隔壁老王": {"荒野猎人": 5.0, "中国合伙人": 4.0, "我的少女时代": 1.0, "Phoenix": 5.0, "甄嬛传": 4.0, "The Strokes": 5.0},
"邻村小芳": {"小时代4": 4.0, "我的少女时代": 4.5, "匆匆那年": 4.5, "甄嬛传": 2.5, "The Strokes": 3.0}
}
# pearson
def pearson_sim(target_user,other_user):
rating1=users[other_user]
rating2=users[target_user]
from math import sqrt
sum_x=0
sum_y=0
sum_xy=0
n=0
sum_x2=0
sum_y2=0
for key in rating1:
if key in rating2:
x=rating1[key]
y=rating2[key]
sum_xy +=x*y#sum_xy=sum_xy+x*y
sum_x +=x
sum_y +=y
n+=1
sum_x2 +=pow(x,2)
sum_y2 += pow(y,2)
fenzi=n*sum_xy-sum_x*sum_y
fenmu=sqrt(n*sum_x2-pow(sum_x,2))*sqrt(n*sum_y2-pow(sum_y,2))
if fenmu==0:
return 0
else:
sim=fenzi/fenmu
return sim
# sim
sim=[]
for user in users:
if user !='六爷':
s=pearson_sim(user,'六爷')
sim.append((s,user))
# reverse=True降序排列
sim.sort(reverse=True)
# top3
sim[:3]