k-近邻算法学习

2016-12-30  本文已影响0人  dechuan
from numpy import *
import operator
def createDataSet():
  group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
  labels=['A','A','B','B']
  return(group,labels)
def classify0(inX,dataSet,labels,k):
  dataSetSize=dataSet.shape[0]
  diffMat=tile(inX,(dataSetSize,1))-dataSet
  sqDiffMat=diffMat**2
  sqDistances=sqDiffMat.sum(axis=1)    #axis=1表示按行相加 , axis=0表示按列相加
  distances=sqDistances**0.5
  print(distances)
  sortedDistIndicies=distances.argsort()  #升序排列,得到的是值所对应的索引
  print(sortedDistIndicies)
  classCount={}
  for i in range(k):
  voteIlabel = labels[sortedDistIndicies[i]]
  classCount[voteIlabel] = classCount.get(voteIlabel,0)+1                  #出现次数越多,值越大
  sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)  #降序排列,比较第2个元素,返回发生频率最高的元素标签
  return(sortedClassCount[0][0])
(dataset,labels)=createDataSet()
result=classify0([0,0],dataset,labels,3)

函数知识:http://www.cnblogs.com/100thMountain/p/4719503.html

上一篇下一篇

猜你喜欢

热点阅读