您的位置:首页 > 其它

Cifar-10数据预处理andNearest Neighbor算法

2016-11-05 22:05 246 查看
*由于5万训练集以及1万测试集较大,运行程序时间较长

因此我们抽取了1/10的训练集

以及1/10的测试集

方便初学者理解*

更改后的代码

#coding=utf-8
#author='HL'
'''
利用pickle模块存储对象
dump类似于write
load类似于read
完成对象的上传以及读取
'''

import cPickle as pickle
import numpy as np

class NearestNeighbor(object):
def __init__(self):
pass

def train(self,x,y):
self.x_Tr = x
self.y_Tr = y

def predict(self,x):
y_Pre = np.zeros(x.shape[0],dtype=type(self.y_Tr))

for i in range(x.shape[0]):

distance = np.sum(abs(self.x_Tr-x[i,]),axis=1)
min_index = np.argmin(distance)
y_Pre[i] = self.y_Tr[min_index]
print y_Pre
return y_Pre

def un_P(path):
f = file(path,'rb')
dict1 = pickle.load(f)
f.close()
return dict1

data_Train = []
labels_Train = []
path = "A:/python_test/cifar-10-batches-py/"
for i in range(1,6):
now_Path = path+'data_batch_'+str(i)
dict1 = un_P(now_Path)
it  = 0
while it <len(dict1['data']):
data_Train.append(dict1['data'][it])
labels_Train.append(dict1['labels'][it])
it+=10
'''
for item in dict1['data']:
data_Train.append(item)
for item in dict1['labels']:
labels_Train.append(item)
'''

data_Test = []
labels_Test = []
dict1 = un_P(path+'test_batch')
for item in dict1['data'][:1000]:
data_Test.append(item)
for item in dict1['labels'][:1000]:
labels_Test.append(item)

data_Tr = np.asarray(data_Train)
data_Te = np.asarray(data_Test)
labels_Tr = np.asarray(labels_Train)
labels_Te = np.asarray(labels_Test)
print data_Tr.shape
print data_Te.shape
print labels_Tr.shape
print labels_Te.shape

zz = NearestNeighbor()
zz.train(data_Tr,labels_Train)
out = zz.predict(data_Te)
print  np.mean(out==labels_Te)


程序运行结果展示



原始代码

#coding=utf-8
#author='HL'
'''
利用pickle模块存储对象
dump类似于write
load类似于read
完成对象的上传以及读取
'''

import cPickle as pickle
import numpy as np

'''
算法部分
其实就是每个比较距离
找出最短的
然后判断就可以了
很简单
刚开始做
纯粹练习
'''
class NearestNeighbor(object):
def __init__(self):
pass

def train(self,x,y):
self.x_Tr = x
self.y_Tr = y

def predict(self,x):
y_Pre = np.zeros(x.shape[0],dtype=type(self.y_Tr))

for i in range(x.shape[0]):
distance = np.sum(abs(self.x_Tr-x[i,]),axis=1)
min_index = np.argmin(distance)
y_Pre[i] = self.y_Tr[min_index]
return y_Pre

def un_P(path):
f = file(path,'rb')
dict1 = pickle.load(f)
f.close()
return dict1

data_Train = []
labels_Train = []
path = "A:/python_test/cifar-10-batches-py/"
for
4000
i in range(1,6):
now_Path = path+'data_batch_'+str(i)
dict1 = un_P(now_Path)
for item in dict1['data']:
data_Train.append(item)
for item in dict1['labels']:
labels_Train.append(item)

data_Test = []
labels_Test = []
dict1 = un_P(path+'test_batch')
for item in dict1['data']:
data_Test.append(item)
for item in dict1['labels']:
labels_Test.append(item)

data_Tr = np.asarray(data_Train)
data_Te = np.asarray(data_Test)
labels_Tr = np.asarray(labels_Train)
labels_Te = np.asarray(labels_Test)

print data_Tr
print data_Tr.shape
print data_Te
print data_Te.shape
print labels_Tr
print labels_Tr.shape
print labels_Te
print labels_Te.shape

zz = NearestNeighbor()
zz.train(data_Tr,labels_Train)
out = zz.predict(data_Te)
print  np.mean(out==labels_Te)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息