您的位置:首页 > 编程语言 > Python开发

机器学习实战_kNN算法python3.6实现与理解

2017-12-04 20:46 1031 查看

机器学习实战_kNN算法python3.6实现与理解

标签(空格分隔): kNN算法

from numpy import *
import operator
from os import listdir

#创建数据集和标签
def createDataSet():
group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A', 'A', 'B', 'B']
return group, labels

#k-近邻算法
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
sortedDistIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)#Python3.5中:iteritems变为items
return sortedClassCount[0][0]

#将文本记录转换为Numpy的解析程序
#将原始数据转为为计算机可以分析的numpy数据(输入一些字符串——————————得到矩阵)
def file2matrix(filename):
fr = open(filename)
numberOfLines = len(fr.readlines())  # get the number of lines in the file
returnMat = zeros((numberOfLines, 3))  # prepare matrix to return
classLabelVector = []  # prepare labels return
fr = open(filename)
index = 0
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat[index, :] = listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index += 1
return returnMat, classLabelVector

#测试数据处理为numpy的结果
datingDataMat,datingLables = file2matrix('datingTestSet2.txt')
#print(datingDataMat)
#print(datingLables)

'''import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
'''
#ax.scatter(datingDataMat[:,1],datingDataMat[:,2])

#用色彩参数个性化标记散点图上的点
ax.scatter(datingDataMat[:,0],datingDataMat[:,1],15.0*array(datingLables),15.0*array(datingLables))
plt.show()

#归一化特征值norm规范化
def autoNorm(dataSet):
minVals = dataSet.min(0)    #从列中选出最小的值
maxVals = dataSet.max(0)    #从列中选出最大的值
ranges = maxVals - minVals  #取值范围(最大值-最小值)
normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals,(m,1)) #tile()将变量的内容复制成输入矩阵同样大小的矩阵
normDataSet = normDataSet/tile(ranges,(m,1)) #这里是具体特征值相除 在numpy库中,矩阵的除法是函数:linalg.solve(matA,matB)
return normDataSet,ranges,minVals

#print(autoNorm(datingDataMat))

#分类器针对约会网站给定的数据进行分类的测试代码(k-近邻算法)
def datingClassTest():
hoRatio = 0.50  # hold out 10%
datingDataMat, datingLabels = file2matrix('datingTestSet2.txt')  # load data setfrom file
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m * hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
print(
"分类器划分的类型: %d, 实际的类型是: %d" % (classifierResult, datingLabels[i]))
if (classifierResult != datingLabels[i]): errorCount += 1.0
print(
"错误率: %f" % (errorCount / float(numTestVecs)))
print("错误的总是:%d"%(errorCount))

#约会网站预测函数
def classifyPerson():
resultList = ['一点都喜欢','喜欢一点','很喜欢']
percentTats = float(input("打电子游戏的时间:?"))
ffMiles = float(input("坐飞机的时间:?"))
iceCream = float(input("吃多少冰淇淋:?"))
datingDataMat,datingLables = file2matrix('datingTestSet2.txt')
normMat,ranges,minVals = autoNorm(datingDataMat)
inArr = array([ffMiles, percentTats, iceCream])
classifyResult = classify0((inArr-minVals)/ranges,normMat,datingLables,3)
print("你的喜欢程度:",resultList[classifyResult -1 ])

classifyPerson()


数据集下载:链接:https://pan.baidu.com/s/1eSvpjLS 密码:e1ni
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息