机器学习实战(二)
2015-11-09 19:56
232 查看
# -*- coding: utf-8 -*- """ Created on Fri Nov 06 16:51:05 2015 @author: hzh """ from numpy import* from os import listdir import operator import matplotlib import matplotlib.pyplot as plt # Create random data with numpy import numpy as np def createDataSet(): group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] return group, labels #k临近算法 def classify0(inX, dataSet, labels, k ): dataSetSize = dataSet.shape[0] diffMat = tile( inX, (dataSetSize, 1 )) - dataSet sqDiffMat = diffMat ** 2 sqDistances = sqDiffMat.sum(axis = 1 ) distances = sqDistances ** 0.5 sortedDistIndicies = distances.argsort() classCount = {} for i in range(k): voteIlabel = labels[sortedDistIndicies[i]] classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1 sortedClassCount = sorted( classCount.iteritems(), key = operator.itemgetter(1), reverse = True) return sortedClassCount[0][0] #将文本记录转换为NumPy的解析程序 def file2matrix( filename ): fr = open( filename ) arrayOLines = fr.readlines() numberOfLines = len( arrayOLines ) returnMat = zores((numberOfLines,3)) classLabelVector = [] index = 0 for line in arrayOLines: line = line.strip() listFromLine = line.split('\t') returnMat[index,:] = listFromLine[0:3] classLabelVector.append(int(listFromLine[-1])) index += 1 return returnMat, classLabelVector #归一化特征值 def autoNorm( dataSet ): minVals = dataSet.min(0) maxVals = dataSet.max(0) ranges = maxVals - minVals normDataSet = zeros( shape(dataSet) ) normDataSet = dataSet - tile( minVals,(dataSet.shape[0],1) ) normDataSet = normDataSet / tile( ranges, (dataSet.shape[0],1)) return normDataSet, ranges, minVals #使用Matplotlib创建散点图 x = random.rand(100) x *= 100 c = random.rand(100,2) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(c[:,0],c[:,1],5*array(x),5*array(x)) plt.show() def img2vector(filename): returnVect = zeros( (1,1024) ) fr = open( filename ) for i in range( 32 ): lineStr = fr.readline() for j in range( 32 ): returnVect[0,32 * i + j ] = int(lineStr[j]) return returnVect #手写数字识别系统的测试代码 def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') m = len( trainingFileList ) trainingMat = zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i,:] = img2vector('trainingDigits/%s' % fileNameStr) testFileList = listdir('testDigits') errorCount = 0.0 mTest = len( testFileList ) for i in range( mTest ): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s'%fileNameStr) classifierResult = classify0( vectorUnderTest, trainingMat, hwLabels, 3 ) print "the classifier came back with : %d, the real answer is : %d "%( classifierResult, classNumStr) if( classifierResult != classNumStr ): errorCount += 1.0 print 'the total number of errors is : %d'%errorCount print 'the total error rate is :%f'%(errorCount/float(mTest)) ############################################################################### d = random.rand(5,2) print d.min(0) print d xx = [ [1,2],[2,1]] xx = array(xx) print xx print autoNorm(xx)[0] handwritingClassTest()
相关文章推荐
- Linux下system和exec函数族的区别
- css:outline
- 分辩零钱-20151108
- Win10 UWP xaml 延迟加载元素
- LightOj 1422
- [问题记录]js将事件写在函数之前解决IE8的兼容性问题
- 使用Python设置环境变量
- Win10 UWP xaml 延迟加载元素
- 递归实现 参数字符串中的字符反向排列
- jq+js 实现星星打分功能!
- jq+js 实现星星打分功能!
- 从头认识java-7.5 怎样通过继承扩展接口?
- OC数组冒泡排序
- wait_queue_head_t 使用
- 线段树2-Mayor's posters-POJ 2528
- Mongoose数据库学习总结
- 《leetCode》:Multiply Strings
- 小记2015-11-9
- NOJ 5538 c语言
- 零长度数组解析