您的位置:首页 > 编程语言 > Python开发

Python实现的KNN分类器

2016-04-24 15:25 267 查看

knn.py

# -*- coding: UTF-8 -*-
'''
Created on 2016-4-24

@author: taiji1985
'''
import numpy as np
import operator
import matplotlib.pyplot as plt

#创建一个测试用的数据集
def createDataSet():
X = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
ylabel = np.array([0,0,1,1])
return {'X':X,'ylabel':ylabel}

#根据x和y创建数据集
def createDataset2(x,y):
return {'X':x,'ylabel':y}

#绘图
def plotData(ds,type='o'):
X= ds['X']
y=ds['ylabel']
n = X.shape[0]
cn = len(np.unique(y))
cs = ['r','g']
dd  = np.arange(n)
for i in range(2):
index= y == i
xx=X[dd[index]]
plt.plot(xx[:,0],xx[:,1],type,markerfacecolor=cs[i],markersize=14)

#对多个样本进行分类
def classify(x,dataSet,k):
n = x.shape[0]
ret = np.zeros(n,'uint8')
for i in range(n):
ret[i] = _classify(x[i], dataSet, k)

return ret
pass

#对一个样本进行分类
def _classify(x,dataSet,k):
X= dataSet['X']
ylabel = dataSet['ylabel']
n = X.shape[0]
cls_label = np.unique(ylabel)
cn=len(cls_label)

# $d=\sqrt{\sum{(x-x_i)^2}}$ 下面公式忽略求平方根因为不影响结果
d=np.tile(x,(n,1)) - X #扩展为n行并求差
d = (d*d).sum(axis=1) #求和
sort_idx  = d.argsort()
vote = np.zeros((cn,1),'uint8')
for i in range(k):
vl = ylabel[sort_idx[i]]
vote[vl]+= 1

#print vote
return np.argmax(vote)
pass


测试

'''
Created on 2016-4-24

@author: taiji1985
'''
import numpy as np
import matplotlib.pyplot as plt
from ml import knn
d=knn.createDataSet()
X= d['X']
y=d['ylabel']

cn = 2

plt.figure()
knn.plotData(d,'o')

plt.axis([-1, 2, -1, 2])

x = np.random.randn(100,2)+1
r =  knn.classify(x, d,2)
#plt.plot(x[0],x[1],'*',markerfacecolor=cs[r],markersize=12)

d2=knn.createDataset2(x,r)
print d['ylabel'].shape
print d2['ylabel'].shape
knn.plotData(d2, '*')

plt.show()


参考

[1] 《机器学习实战》
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python 机器学习 knn