纯numpy写朴素贝叶斯模型
2019-05-29 本文已影响0人
_龙雀
朴素贝叶斯学习到生成数据的机制,属于生成模型。
学习过程:模型思想,根据数据集计算出联合分布概率,即计算先验概率和条件概率
如何计算:可利用极大时然估计,或者贝叶斯估计
预测过程:根据后验概率进行预测
为什么可以这样预测?
证明:将样本分配到后验概率最大的类中等价于期望风险最小化
import numpy as np
from collections import Counter
class Navie_Bayes():
def __init__(self):
self.lmbda = 0.2
self.p_y = {}
self.p_x0_y = {}
self.p_x1_y = {}
def fit(self, X, Y):
y_counts = Counter(Y)
class_number = len(y_counts.keys())
for i in y_counts.keys():
self.p_y[i] = (y_counts[i] + self.lmbda)/(X.shape[0] + class_number * self.lmbda)
x0_counts = Counter(X[:,0])
x1_counts = Counter(X[:,1])
for x, y in zip(X, Y):
self.p_x0_y[(x[0],y)] = (x0_counts[x[0]] + self.lmbda) / (y_counts[y] + class_number * self.lmbda)
self.p_x1_y[(x[1],y)] = (x1_counts[x[1]] + self.lmbda) / (y_counts[y] + class_number * self.lmbda)
def predict(self, new_x):
p0 = self.p_y[-1] * self.p_x0_y[(new_x[0],-1)] * self.p_x1_y[(new_x[1],-1)]
p1 = self.p_y[1] * self.p_x0_y[(new_x[0],1)] * self.p_x1_y[(new_x[1],1)]
if p0 > p1:
return -1
else:
return 1
if __name__ == '__main__':
X = np.array([[1,'S'],[1,'M'],[1,'M'],[2,'S'],[2,'M'],[2,'M'],[2,'L'],[2,'L'],[3,'L'],[3,'M'],[3,'M'],[3,'L'],[3,'L']])
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
navies_bayes = Navie_Bayes()
navies_bayes.fit(X, y)
x_new = np.array([2,'S'])
y_pred = navies_bayes.predict(x_new)
print('My Navie_Bayes predict %d' %y_pred)