您的位置：首页 > 理论基础 > 计算机网络

基于卷积神经网络的数据预处理以及模型参数保存

2015-06-23 21:37 806 查看

由于卷积神经网络主要用于图像的分类识别，所以本文中的数据预处理是对图像数据进行操作。之前卷积神经网络是对手写数字数据做识别，而所有的数字都被看成是28×28像素的灰度图被输入到网络中进行训练所以为了能直接使用网上deep learning教程中的卷积神经网络代码，这里对所有的数据也均是处理成28×28像素的灰度图进行处理，将图片作为输入实际上是将图片对应的二维矩阵输入到卷积神经网络模型中进行训练。

1.基于opencv的将图像转换成二维数组的代码：

import cv
import cPickle
import os, sys
from stat import *

#import Image
import pylab
from PIL import Image

import numpy

PicPathNameList = []
PicWidthList = []
PicHeightList = []

def WalkTree(top, callback):
for f in os.listdir(top):
pathname = os.path.join(top, f)
mode = os.stat(pathname)[ST_MODE]

if S_ISDIR(mode):
WalkTree(pathname, callback)
elif S_ISREG(mode):
callback(pathname)
else:
print 'Skipping %s' % pathname

def GetPicInfo(file):
global PicPathNameList
global PicWidthList
global PicHeightList

try:
image = Image.open(file)
PicPathNameList.append(file)
PicWidthList.append(image.size[0])
PicHeightList.append(image.size[1])
except IOError:
pass

def JReduce(image,m,n):
H = 28
W = 28
size = (W,H)
iJReduce = cv.CreateImage(size,image.depth,image.nChannels)
for i in range(H):
for j in range(W):
x1 = int(i/m)
x2 = int((i+1)/m)
y1 = int(j/n)
y2 = int((j+1)/n)
sum = [0,0,0]
for k in range(x1,x2):
for l in range(y1,y2):
sum[0] = sum[0]+image[k,l][0]
sum[1] = sum[1]+image[k,l][1]
sum[2] = sum[2]+image[k,l][2]
num = (x2-x1)*(y2-y1)
iJReduce[i,j] = (sum[0]/num,sum[1]/num,sum[2]/num)
return iJReduce

if __name__ == '__main__':
WalkTree('test2', GetPicInfo)#批处理文件夹test2中的所有图片
print "PicPathNameList Begin"
print PicPathNameList
print PicWidthList
print PicHeightList
print "PicPathNameList End"

rng = numpy.random.RandomState(23455)

xx = []
print numpy.size(PicPathNameList)
for i in xrange(numpy.size(PicPathNameList)):
image = cv.LoadImage(PicPathNameList[i],1)
iJReduce2 = JReduce(image,28./PicHeightList[i],28./PicWidthList[i])

img2 = cv.CreateImage(cv.GetSize(iJReduce2),8,1)#生成一个与iJReduce2大小一样的图片，parameter2：深度，
#parameter3：通道数。parameter=3表示RGB三通道，parameter=1表示单通道即黑白图片

cv.CvtColor(iJReduce2,img2,cv.CV_BGR2GRAY)
cv.SaveImage(PicPathNameList[i],img2)
cv.WaitKey(0)
img = Image.open(PicPathNameList[i])

# dimensions are (height, width, channel)
img = numpy.asarray(img, dtype='float64') / 256.
xx.append(img)

xx = numpy.array(xx)
zz = []

for i in xrange(3):
zz.append([])
for j in xrange(28):
for k in xrange(28):
zz[i].append(xx[i][j][k])

print "++++++++++++++++++++++++++++++++++++++++++++++++++"
zz = numpy.array(zz)
print numpy.size(zz)

yy = []

for i in xrange(3):
yy.append(i)

print yy

cPickle.dump(yy,open("testset1y.pkl","w"))
cPickle.dump(zz,open("testset1x.pkl","w"))
#"""
#cPickle.dump(yy,open("testset2y.pkl","w"))
#cPickle.dump(zz,open("testset2x.pkl","w"))
#"""

上面xx为一个三维数组，数组大小为3*28*28，即在test2中只有3幅图片，而手写数字数据库mnist中每个数字图片却以一维数组的形式存放，即一行表示一个数字样本有28*28=784个数，所以这里需要将三维数组变成二维数组的形式，即变成3*784。这里还利用了cPickle库将数组存放在文件testset1x.pkl和testset1y.pkl中。

2.卷积神经网络

xx1 = cPickle.load(open("testset1x.pkl","r"))
yy1 = cPickle.load(open("testset1y.pkl","r"))

def shared_dataset(data_x, data_y, borrow=True):
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y = theano.shared(numpy.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y, 'int64')

train_set_x, train_set_y = shared_dataset(xx1,yy1)
valid_set_x, valid_set_y = shared_dataset(xx1,yy1)
test_set_x, test_set_y = shared_dataset(xx1,yy1)

theano.shared函数作用为该变量是共享变量，如上面的shared_x以及shared_y，在程序中任意位置的对这个变量的作用均会使其值发生改变。如若我们只运用

xx1 = cPickle.load(open("testset1x.pkl","r"))
yy1 = cPickle.load(open("testset1y.pkl","r"))
train_set_x = xx1
train_set_y = yy1

程序会报错，因为这里的train_set_x和train_set_y将会作为theano函数的参数被用到，而theano中规定只要涉及到theano函数，其中的变量，参数均要求定义成theano的形式。而且在程序中我们不能通过直接将theano变量print来输出其变量的值，必须以函数的形式进行变量的print。

if this_validation_loss < best_validation_loss:
if this_validation_loss < best_validation_loss *  \
improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter

test_losses = [
test_model(i)
for i in xrange(n_test_batches)
]
#print test_losses

cPickle.dump(params,open("params.pkl","w"))
#将所有训练好的参数保存在params.pkl中

test_score = numpy.mean(test_losses[0][0])
print(('     epoch %i, minibatch %i/%i, test erro of ' 'best model %f %%') %
epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))

3.用作预测分类的卷积神经网络模型

import os
import sys
import time

import numpy

import theano
import theano.tensor as T
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from stat import *

import cPickle

def LeNetConvPoolLayer(W,b,x, filter_shape, image_shape, poolsize=(2, 2)):
assert image_shape[1] == filter_shape[1]
fan_in = numpy.prod(filter_shape[1:])
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
numpy.prod(poolsize))
conv_out = conv.conv2d(
input=x,
filters=W,
filter_shape=filter_shape,
image_shape=image_shape
)

pooled_out = downsample.max_pool_2d(
input=conv_out,
ds=poolsize,
ignore_border=True
)

output = T.tanh(pooled_out + b.dimshuffle('x', 0, 'x', 'x'))
return output

def evaluate_lenet5(nkerns=[20, 50], batch_size=3):

def HiddenLayer(x, W, b, activation = T.tanh):
lin_output = T.dot(x, W) + b
output = (
lin_output if activation is None
else activation(lin_output)
)
return output
# parameters of the model

def LogisticRegression(x,y, W, b):
p_y_given_x = T.nnet.softmax(T.dot(x,W) + b)
y_pred = T.argmax(p_y_given_x, axis=1)
return (T.mean(T.neq(y_pred, y)), y_pred)

params = cPickle.load(open("params.pkl","r"))

xx1 = cPickle.load(open("testset2x.pkl","r"))
yy1 = cPickle.load(open("testset2y.pkl","r"))

def shared_dataset(data_x, data_y, borrow=True):
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y = theano.shared(numpy.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y, 'int64')

train_set_x, train_set_y = shared_dataset(xx1,yy1)
valid_set_x, valid_set_y = shared_dataset(xx1,yy1)
test_set_x, test_set_y = shared_dataset(xx1,yy1)

n_test_batches = 1

index = T.lscalar()  # index to a [mini]batch

x = T.dmatrix('x')   # the data is presented as rasterized images
y = T.lvector('y')  # the labels are presented as 1D vector of

print '... start to predict....'

layer0_input = x.reshape((batch_size, 1, 28, 28))

layer0_output = LeNetConvPoolLayer(
params[6],
params[7],
x=layer0_input,
image_shape=(batch_size, 1, 28, 28),
filter_shape=(nkerns[0], 1, 5, 5),
poolsize=(2, 2)
)

layer1_out = LeNetConvPoolLayer(
params[4],
params[5],
x=layer0_output,
image_shape=(batch_size, nkerns[0], 12, 12),
filter_shape=(nkerns[1], nkerns[0], 5, 5),
poolsize=(2, 2)
)

layer2_input = layer1_out.flatten(2)

layer2_output = HiddenLayer(
layer2_input,
params[2],
params[3],
activation=T.tanh
)

layer3_erros, layer3_out = LogisticRegression(
layer2_output,
test_set_y,
params[0],
params[1]
)

test_model = theano.function(
[index],
[layer3_erros, layer3_out],
givens={
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)
test_losses = [test_model(i) for i in xrange(n_test_batches)]

test_score = numpy.mean(test_losses[0][0])

print test_losses[0][1]

if __name__ == '__main__':
evaluate_lenet5()

def experiment(state, channel):
evaluate_lenet5(state.learning_rate, dataset=state.dataset)

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航