您的位置：首页 > 其它
Theano(3) 通用RNN

2016-03-22 21:59 260 查看
#coding=utf-8
import theano
import numpy as np
from theano import tensor as T
from collections import OrderedDict

class RNN(object):
'''neural net model '''
def __init__(self,ni,nh,nc,lr=0.05,batch_size=64,singleout=True,hidden_activation=T.nnet.sigmoid,output_activation=T.nnet.softmax,cost_function='nll'):
'''
ni  :: dimension of the input layer
nh :: dimension of the hidden layer
nc :: dimension of the output layer(number of classes)
singleout  :: true or false
hidden_activation ::T.nnet.sigmoid or T.tanh
output_activation  :: T.nnet.softmax
cost_function :: nll or cxe(0,1) or mse(^2)
'''
# parameters of the model
self.ni = ni
self.nh = nh
self.nc = nc
def init_weight(mx,nx):
theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0,(mx, nx)).astype(theano.config.floatX))

self.Win = init_weight(self.ni,self.nh)#input layer weight (ni*nh)
self.Wh = init_weight(self.nh,self.nh)#hiden layer weight (nh*nh)
self.Wo = init_weight(self.nh,self.nc) #output layer weight (nh*nc)
self.bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))#bia of hiden (nh)
self.b = theano.shared(numpy.zeros(nc, dtype=theano.config.floatX))#bia of output (nc)
self.h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))#init hiden state (nh)

# bundle
self.params = [self.Wx, self.Wh, self.Wo, self.bh, self.b, self.h0]
self.names = ['Wx', 'Wh', 'Wo', 'bh', 'b', 'h0']
self.activation = output_activation
self.hactivation = hidden_activation
x = T.tensor3()
#二/多分类
if singleout:
y = T.matrix()
else:
y = T.tensor3()

#每步的迭代过程
def recurrence(x_t, h_tm1):
h_t = self.hactivation(T.dot(x_t, self.Wx) + T.dot(h_tm1, self.Wh) + self.bh)#hl time_t output=f(xin*Wx+h_t-1*Wh+b)
s_t = T.nnet.softmax(T.dot(h_t, self.W) + self.b)#time_t output
return [h_t, s_t]

[h, s], _= theano.scan(fn=recurrence, sequences=x.dimshuffle(1,0,2), outputs_info=[self.h0, None])

if singleout:
self.ouput = s[-1]
else:
self.ouput = s.dimshuffle(1,0,2)
#cost function
cxe = T.mean(T.nnet.binary_crossentropy(self.ouput,y))
nll = -T.mean(y*T.log(self.ouput)+(1.-y)*T.log(1.-self.ouput))
mse = T.mean((self.ouput-y)**2)
cost = 0
if cost_function == 'mse':
cost = mse
elif cost_function == 'cxe':
cost = cxe
else:
cost = nll
#learning rate
lr = shared(np.cast[dtype](lr))
#gradients
gradients = T.grad(cost, self.params)
updates = OrderedDict((p, p-lr*g) for p, g in zip(self.params, gradients))
# theano functions
self.classify = theano.function(inputs=[x], outputs=self.output)
#训练
self.train = theano.function( inputs=[x,y],outputs=cost,updates=updates)
#loss
self.loss = theano.function(inputs=[x,y],outputs=cost)
#save params
def save(self, folder):
for param, name in zip(self.params, self.names):
numpy.save(os.path.join(folder, name + '.npy'), param.get_value())
内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理
标签：
相关文章推荐
新的分享
章节导航