kNN-k邻近算法思想
2019-07-11 本文已影响0人
_PatrickStar
(笔记)
#!/user/bin/env python
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from collections import Counter
# 数据是我copy教程的
raw_data_X = [[3.393533211, 2.331273381],
[3.110073483, 1.781539638],
[1.343808831, 3.368360954],
[3.582294042, 4.679179110],
[2.280362439, 2.866990263],
[7.423436942, 4.696522875],
[5.745051997, 3.533989803],
[9.172168622, 2.511101045],
[7.792783481, 3.424088941],
[7.939820817, 0.791637231]
]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)
# print(X_train)
a = np.array([8.093607318, 3.365731514])
# 第一个X_train[y_train == 0, 0]表示点的x值,第二个X_train[y_train == 0, 1]表示y
# y_train == 0, 0这里面y_train == 0是一个布尔值
# 所以y_train == 0, 0意思就是当y_train == 0为真是的第0个元素,其实就是对应矩阵X的满足y=0的x轴的值,也就是 后面的0和1分别表示x轴y轴
plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], color='r')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], color='g')
plt.scatter(a[0], a[1],color='b')
plt.show() # 以上都是matplotlib的绘图基础
# 可以用for循环,也可以用推导式
# distances = []
# for i in X_train:
# d = sqrt(np.sum((i-a)**2))
# distances.append(d)
# print(distances)
# 推导式
distances = [sqrt(np.sum((i-a)**2)) for i in X_train]
# print(distances)
nearest = np.argsort(distances) # np.argsort(distances)表示根据距离排序并拿到这几个点的索引
k = 6
attr_y = [y_train[i] for i in nearest[:k]] #在这6个距离最近的点循环,并得到他们y_train对应的值
# print(attr_y) # [1, 1, 1, 1, 1, 0]
votes = Counter(attr_y) # 引入计数方法并取名votes
# print(votes) # Counter({1: 5, 0: 1}) 表示value为1有5个,value为0的有一个
# most_common()表示将前面字典{}按逗号分成大小为n的列表
# most_common(1)表示取列表中的第一项生成列表 =>[(1, 5)]
# votes.most_common(1)[0]表示取第一项的的第0个值=>(1, 5)
# votes.most_common(1)[0][0]表示取第0个值的第0项=>1
predict = votes.most_common(1)[0][0]
print(predict) # 1
输出结果:
image.png