您的位置:首页 > 编程语言 > Python开发

Python实现KNN算法

2015-08-28 20:22 661 查看
导入的包

import csv
import random
import math
import operator


读取数据:

def loadDataSet(filename,split,trainingSet=[] , testSet = []):
with open(filename,'rb') as csvfile:
lines = csv.reader(csvfile)
dataSet = list(lines)
for x in range(len(dataSet) -1):
for y in range(4):
dataSet[x][y] = float(dataSet[x][y])
if random.random()<split:
trainingSet.append(dataSet[x])
else:
testSet.append(dataSet[x])


计算欧氏距离:

def euclideanDistance(vec1,vec2,length):
distance = 0
for x in range(length):
distance +=pow(vec1[x] - vec2[x],2)
return math.sqrt(distance)


选取最近的k个训练数据集:

def getNeighbors(trainingSet,testInstance,k):
distances = []
length = len(testInstance) - 1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance,trainingSet[x],length)
distances.append((trainingSet[x],dist))
distances.sort(key = operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors


在最近的k个中选取出现最多的那个类别标签:

def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in classVotes:
classVotes[response] +=1
else:
classVotes[response] = 1
sortedVotes = sorted(classVotes.iteritems(),key=operator.itemgetter(1))
return sortedVotes[0][0]


计算精确度:

def getAccuracy(testSet,predictions):
correct = 0
for x in range(len(testSet)):
if str(testSet[x][-1]) == str(predictions[x]):
correct +=1
else:
print 'real:',testSet[x][-1],'pre:',predictions[x]
return (correct/float(len(testSet)))*100.0


主函数:

def main():
print "read data"
trainingSet = []
testSet = []
split = 0.67
loadDataSet('iris.data',split,trainingSet,testSet)
print 'train set:'+repr(len(trainingSet))
print 'test set:'+repr(len(testSet))
print 'predictions'
k = 3
predictions = []
for x in range(len(testSet)):
neighbors = getNeighbors(trainingSet,testSet[x],k)
result = getResponse(neighbors)
predictions.append(result)
accuracy = getAccuracy(testSet,predictions)

print('Accuracy: ' + repr(accuracy) + '%')

main()


参考链接

数据集
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: