推荐系统9:MF推荐
1.LFM推荐
思路和ALS算法类似,区别在于,ALS利用坐标下降法,LFM利用梯度下降法
假设: 评分矩阵𝑅𝑚,𝑛,m个用户对n个物品评分
:用户u对物品i的评分
𝑅𝑚,𝑛 = 𝑃𝑚,𝐹 ∙ 𝑄𝐹,𝑛:R是两个矩阵的乘积
P:每一行代表一个用户对各隐因子的喜欢程序
Q:每一列代表一个物品在各个隐因子上的概率分布
尽可能和相近,
防止过拟合,加入正则项:
即损失函数为:
采用梯度下降法,在t+1轮迭代中,P和Q的值分别是
随机梯度下降并没有严密的理论证明,实践经验,通常比传统梯度下降法需要更少的迭代次数就可以收敛
传统梯度:
随机梯度:
计算时,只利用用户u对一个物品的评分,而不是利用用户u的所有评分
LFM推荐demo:
# coding:utf-8
import random
import math
try:
xrange
except NameError:
# Python 3 compat
xrange = range
class LFM(object):
def __init__(self, rating_data, F, alpha=0.1, lmbd=0.1, max_iter=500):
'''rating_data是list<(user,list<(position,rate)>)>类型
'''
self.F = F
self.P = dict()
self.Q = dict()
self.alpha = alpha
self.lmbd = lmbd
self.max_iter = max_iter
self.rating_data = rating_data
'''随机初始化矩阵P和Q'''
for user, rates in self.rating_data:
self.P[user] = [random.random() / math.sqrt(self.F)
for x in xrange(self.F)]
for item, _ in rates:
if item not in self.Q:
self.Q[item] = [random.random() / math.sqrt(self.F)
for x in xrange(self.F)]
def train(self):
'''随机梯度下降法训练参数P和Q
'''
for step in xrange(self.max_iter):
for user, rates in self.rating_data:
for item, rui in rates:
hat_rui = self.predict(user, item)
err_ui = rui - hat_rui
for f in xrange(self.F):
self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
self.Q[item][f] += self.alpha * (err_ui * self.P[user][f] - self.lmbd * self.Q[item][f])
self.alpha *= 0.9 # 每次迭代步长要逐步缩小
def predict(self, user, item):
'''预测用户user对物品item的评分
'''
return sum(self.P[user][f] * self.Q[item][f] for f in xrange(self.F))
if __name__ == '__main__':
'''用户有A B C,物品有a b c d'''
rating_data = list()
rate_A = [('a', 1.0), ('b', 1.0)]
rating_data.append(('A', rate_A))
rate_B = [('b', 1.0), ('c', 1.0)]
rating_data.append(('B', rate_B))
rate_C = [('c', 1.0), ('d', 1.0)]
rating_data.append(('C', rate_C))
lfm = LFM(rating_data, 2)
lfm.train()
for item in ['a', 'b', 'c', 'd']:
print(item, lfm.predict('A', item)) # 计算用户A对各个物品的喜好程度
运行代码,推荐结果:
a 0.6772442553573618
b 0.7600624403943927
c 0.9328792453570258
d 0.7089159198323267
2.SVD推荐
LFM没有考虑可观的“偏置”,所以带偏置的LFM称为SVD
偏置:事件固有的,不受外界影响的属性
• 𝜇:训练集中所有评分的平均值
• 𝑏𝑢:用户偏置,代表一个用户评分的平均值
• 𝑏𝑖:物品偏置,代表一个物品被评分的平均值
更新方法:
SVD实现demo
# coding:utf-8
__author__ = "orisun"
import random
import math
try:
xrange
except NameError:
# Python 3 compat
xrange = range
class BiasLFM(object):
def __init__(self, rating_data, F, alpha=0.1, lmbd=0.1, max_iter=500):
'''rating_data是list<(user,list<(position,rate)>)>类型
'''
self.F = F
self.P = dict()
self.Q = dict()
self.bu = dict()
self.bi = dict()
self.alpha = alpha
self.lmbd = lmbd
self.max_iter = max_iter
self.rating_data = rating_data
self.mu = 0.0
'''随机初始化矩阵P和Q'''
cnt = 0
for user, rates in self.rating_data:
self.P[user] = [random.random() / math.sqrt(self.F)
for x in xrange(self.F)]
self.bu[user] = 0
cnt += len(rates)
for item, rate in rates:
self.mu += rate
if item not in self.Q:
self.Q[item] = [random.random() / math.sqrt(self.F)
for x in xrange(self.F)]
self.bi[item] = 0
self.mu /= cnt
def train(self):
'''随机梯度下降法训练参数P和Q
'''
for step in xrange(self.max_iter):
for user, rates in self.rating_data:
for item, rui in rates:
hat_rui = self.predict(user, item)
err_ui = rui - hat_rui
self.bu[user] += self.alpha * (err_ui - self.lmbd * self.bu[user])
self.bi[item] += self.alpha * (err_ui - self.lmbd * self.bi[item])
for f in xrange(self.F):
self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
self.Q[item][f] += self.alpha * (err_ui * self.P[user][f] - self.lmbd * self.Q[item][f])
self.alpha *= 0.9 # 每次迭代步长要逐步缩小
def predict(self, user, item):
'''预测用户user对物品item的评分
'''
return sum(self.P[user][f] * self.Q[item][f] for f in xrange(self.F)) + self.bu[user] + self.bi[item] + self.mu
if __name__ == '__main__':
'''用户有A B C,物品有a b c d'''
rating_data = list()
rate_A = [('a', 1.0), ('b', 1.0)]
rating_data.append(('A', rate_A))
rate_B = [('b', 1.0), ('c', 1.0)]
rating_data.append(('B', rate_B))
rate_C = [('c', 1.0), ('d', 1.0)]
rating_data.append(('C', rate_C))
lfm = BiasLFM(rating_data, 2)
lfm.train()
for item in ['a', 'b', 'c', 'd']:
print(item, lfm.predict('A', item)) # 计算用户A对各个物品的喜好程度
运行代码,推荐结果:
a 1.0112206656603693
b 0.9885043037157129
c 0.9868790391421494
d 1.00612285421106
3.SVD++推荐
SVD++:任何用户只要对物品i有过评分,无论评分多少,已经在一定程度上反映了用户对各个隐因子的喜好
程度𝑦𝑖 = (𝑦𝑖1, 𝑦𝑖2, … , 𝑦𝑖𝐹),y是物品携带的属性
• 𝑁(𝑢):用户u评价过的物品集合
• 𝑏𝑢:用户偏置,代表一个用户评分的平均值
• 𝑏𝑖:物品偏置,代表一个物品被评分的平均值
SVD++推荐demo
# coding:utf-8
__author__ = "orisun"
import random
import math
try:
xrange
except NameError:
# Python 3 compat
xrange = range
class SVDPP(object):
def __init__(self, rating_data, F, alpha=0.1, lmbd=0.1, max_iter=500):
'''rating_data是list<(user,list<(position,rate)>)>类型
'''
self.F = F
self.P = dict()
self.Q = dict()
self.Y = dict()
self.bu = dict()
self.bi = dict()
self.alpha = alpha
self.lmbd = lmbd
self.max_iter = max_iter
self.rating_data = rating_data
self.mu = 0.0
'''随机初始化矩阵P、Q、Y'''
cnt = 0
for user, rates in self.rating_data:
self.P[user] = [random.random() / math.sqrt(self.F)
for x in xrange(self.F)]
self.bu[user] = 0
cnt += len(rates)
for item, rate in rates:
self.mu += rate
if item not in self.Q:
self.Q[item] = [random.random() / math.sqrt(self.F)
for x in xrange(self.F)]
if item not in self.Y:
self.Y[item] = [random.random() / math.sqrt(self.F)
for x in xrange(self.F)]
self.bi[item] = 0
self.mu /= cnt
def train(self):
'''随机梯度下降法训练参数P和Q
'''
for step in xrange(self.max_iter):
for user, rates in self.rating_data:
z = [0.0 for f in xrange(self.F)]
for item, _ in rates:
for f in xrange(self.F):
z[f] += self.Y[item][f]
ru = 1.0 / math.sqrt(1.0 * len(rates))
s = [0.0 for f in xrange(self.F)]
for item, rui in rates:
hat_rui = self.predict(user, item, rates)
err_ui = rui - hat_rui
self.bu[user] += self.alpha * (err_ui - self.lmbd * self.bu[user])
self.bi[item] += self.alpha * (err_ui - self.lmbd * self.bi[item])
for f in xrange(self.F):
s[f] += self.Q[item][f] * err_ui
self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
self.Q[item][f] += self.alpha * (
err_ui * (self.P[user][f] + z[f] * ru) - self.lmbd * self.Q[item][f])
for item, _ in rates:
for f in xrange(self.F):
self.Y[item][f] += self.alpha * (s[f] * ru - self.lmbd * self.Y[item][f])
self.alpha *= 0.9 # 每次迭代步长要逐步缩小
def predict(self, user, item, ratedItems):
'''预测用户user对物品item的评分
'''
z = [0.0 for f in xrange(self.F)]
for ri, _ in ratedItems:
for f in xrange(self.F):
z[f] += self.Y[ri][f]
return sum(
(self.P[user][f] + z[f] / math.sqrt(1.0 * len(ratedItems))) * self.Q[item][f] for f in xrange(self.F)) + \
self.bu[user] + self.bi[item] + self.mu
if __name__ == '__main__':
'''用户有A B C,物品有a b c d'''
rating_data = list()
rate_A = [('a', 1.0), ('b', 1.0)]
rating_data.append(('A', rate_A))
rate_B = [('b', 1.0), ('c', 1.0)]
rating_data.append(('B', rate_B))
rate_C = [('c', 1.0), ('d', 1.0)]
rating_data.append(('C', rate_C))
lfm = SVDPP(rating_data, 2)
lfm.train()
for item in ['a', 'b', 'c', 'd']:
print(item, lfm.predict('A', item, rate_A)) # 计算用户A对各个物品的喜好程度
运行代码,推荐结果:
a 1.0006164975499188
b 0.994332724556376
c 1.0139922754595898
d 0.9916958194602059