kNN-k邻近算法思想

2019-07-11  本文已影响0人  _PatrickStar

(笔记)

#!/user/bin/env python
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from collections import Counter

# 数据是我copy教程的
raw_data_X = [[3.393533211, 2.331273381],
              [3.110073483, 1.781539638],
              [1.343808831, 3.368360954],
              [3.582294042, 4.679179110],
              [2.280362439, 2.866990263],
              [7.423436942, 4.696522875],
              [5.745051997, 3.533989803],
              [9.172168622, 2.511101045],
              [7.792783481, 3.424088941],
              [7.939820817, 0.791637231]
             ]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)
# print(X_train)
a = np.array([8.093607318, 3.365731514])

# 第一个X_train[y_train == 0, 0]表示点的x值,第二个X_train[y_train == 0, 1]表示y
# y_train == 0, 0这里面y_train == 0是一个布尔值
# 所以y_train == 0, 0意思就是当y_train == 0为真是的第0个元素,其实就是对应矩阵X的满足y=0的x轴的值,也就是 后面的0和1分别表示x轴y轴
plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], color='r')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], color='g')
plt.scatter(a[0], a[1],color='b')
plt.show()  # 以上都是matplotlib的绘图基础



# 可以用for循环,也可以用推导式
# distances = []
# for i in X_train:
#     d = sqrt(np.sum((i-a)**2))
#     distances.append(d)

# print(distances)

# 推导式
distances = [sqrt(np.sum((i-a)**2)) for i in X_train]
# print(distances)

nearest = np.argsort(distances)  # np.argsort(distances)表示根据距离排序并拿到这几个点的索引
k = 6

attr_y = [y_train[i] for i in nearest[:k]]  #在这6个距离最近的点循环,并得到他们y_train对应的值
# print(attr_y)  # [1, 1, 1, 1, 1, 0]

votes = Counter(attr_y)   # 引入计数方法并取名votes

# print(votes)  # Counter({1: 5, 0: 1}) 表示value为1有5个,value为0的有一个

# most_common()表示将前面字典{}按逗号分成大小为n的列表
# most_common(1)表示取列表中的第一项生成列表 =>[(1, 5)]
# votes.most_common(1)[0]表示取第一项的的第0个值=>(1, 5)
# votes.most_common(1)[0][0]表示取第0个值的第0项=>1
predict = votes.most_common(1)[0][0]

print(predict)  # 1



输出结果:


image.png
上一篇下一篇

猜你喜欢

热点阅读