您的位置:首页 > 编程语言 > Python开发

python实战:sklearn的KNN算法实现手写数据的分类

2018-03-06 12:18 776 查看
from numpy import *
import pandas as  pd
import os
from sklearn.neighbors import KNeighborsClassifier as knn

def img_to_vector(filename):  #数据集已经是32*32矩阵格式,保存于txt中,此函数实现读取32*32数据并存于1*1024的list矩阵中
fr=open(filename)
returnVector=zeros((1,1024))
all_lines=fr.readlines()
for i in range(len(all_lines)):
now_line=all_lines[i]
for j in range(32):
returnVector[0,i*32+j]=int(now_line[j])
return returnVector

def classifyHandWriting(k):
hwLabels=[]
fileNameList = os.listdir('/Users/me/PycharmProjects/untitled4/trainingDigits')
num_of_trainingFiles = len(fileNameList)
trainingMat=zeros((num_of_trainingFiles,1024))

for i in range(num_of_trainingFiles):
fileNameStr=fileNameList[i]
fileNameNum=fileNameList[i].split('.')[0]
Num_result=fileNameNum.split('_')[0]
hwLabels.append(int(Num_result))
file_name='/Users/me/PycharmProjects/untitled4/trainingDigits/'+fileNameStr
trainingMat[i,:]=img_to_vector(file_name)

model=knn(n_neighbors=k,algorithm='auto') #建立knn模型
model.fit(trainingMat,hwLabels)。 #对模型输入训练集和训练集的分类结果

testFileNameList=os.listdir('/Users/me/PycharmProjects/untitled4/testDigits') #读取测试集
num_of_testingFiles=len(testFileNameList)
testingMat=zeros((num_of_testingFiles,1024))

count_error=0。#统计分类失败的个数

for j in range(num_of_testingFiles):
testNameStr=testFileNameList[j]
testfileNumName=testNameStr.split('.')[0]
test_real_result=int(testfileNumName.split('_')[0])
test_file_name='/Users/me/PycharmProjects/untitled4/testDigits/'+testNameStr
inX=img_to_vector(test_file_name)
predict_result=int(model.predict(inX)[0])  #将测试集输入进行预测

print("测的结果是%s,而实际是%s"%(predict_result,test_real_result))
if predict_result!=test_real_result:
count_error=count_error+1.0

print("总数是:%s,测试错误数量是%s"%(num_of_testingFiles,count_error))
print("错误率是%s"%((count_error*1.0)/num_of_testingFiles))

classifyHandWriting(9)

阅读更多
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: