Theano(3) 通用RNN
2016-03-22 21:59
260 查看
#coding=utf-8 import theano import numpy as np from theano import tensor as T from collections import OrderedDict class RNN(object): '''neural net model ''' def __init__(self,ni,nh,nc,lr=0.05,batch_size=64,singleout=True,hidden_activation=T.nnet.sigmoid,output_activation=T.nnet.softmax,cost_function='nll'): ''' ni :: dimension of the input layer nh :: dimension of the hidden layer nc :: dimension of the output layer(number of classes) singleout :: true or false hidden_activation ::T.nnet.sigmoid or T.tanh output_activation :: T.nnet.softmax cost_function :: nll or cxe(0,1) or mse(^2) ''' # parameters of the model self.ni = ni self.nh = nh self.nc = nc def init_weight(mx,nx): theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0,(mx, nx)).astype(theano.config.floatX)) self.Win = init_weight(self.ni,self.nh)#input layer weight (ni*nh) self.Wh = init_weight(self.nh,self.nh)#hiden layer weight (nh*nh) self.Wo = init_weight(self.nh,self.nc) #output layer weight (nh*nc) self.bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))#bia of hiden (nh) self.b = theano.shared(numpy.zeros(nc, dtype=theano.config.floatX))#bia of output (nc) self.h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))#init hiden state (nh) # bundle self.params = [self.Wx, self.Wh, self.Wo, self.bh, self.b, self.h0] self.names = ['Wx', 'Wh', 'Wo', 'bh', 'b', 'h0'] self.activation = output_activation self.hactivation = hidden_activation x = T.tensor3() #二/多分类 if singleout: y = T.matrix() else: y = T.tensor3() #每步的迭代过程 def recurrence(x_t, h_tm1): h_t = self.hactivation(T.dot(x_t, self.Wx) + T.dot(h_tm1, self.Wh) + self.bh)#hl time_t output=f(xin*Wx+h_t-1*Wh+b) s_t = T.nnet.softmax(T.dot(h_t, self.W) + self.b)#time_t output return [h_t, s_t] [h, s], _= theano.scan(fn=recurrence, sequences=x.dimshuffle(1,0,2), outputs_info=[self.h0, None]) if singleout: self.ouput = s[-1] else: self.ouput = s.dimshuffle(1,0,2) #cost function cxe = T.mean(T.nnet.binary_crossentropy(self.ouput,y)) nll = -T.mean(y*T.log(self.ouput)+(1.-y)*T.log(1.-self.ouput)) mse = T.mean((self.ouput-y)**2) cost = 0 if cost_function == 'mse': cost = mse elif cost_function == 'cxe': cost = cxe else: cost = nll #learning rate lr = shared(np.cast[dtype](lr)) #gradients gradients = T.grad(cost, self.params) updates = OrderedDict((p, p-lr*g) for p, g in zip(self.params, gradients)) # theano functions self.classify = theano.function(inputs=[x], outputs=self.output) #训练 self.train = theano.function( inputs=[x,y],outputs=cost,updates=updates) #loss self.loss = theano.function(inputs=[x,y],outputs=cost) #save params def save(self, folder): for param, name in zip(self.params, self.names): numpy.save(os.path.join(folder, name + '.npy'), param.get_value())
相关文章推荐
- android学习必备java基础知识——内部类
- iOS开发调试技巧
- 什么才算是真正的编程能力?
- 泛型(一)
- zoj 3430 Detect the Virus
- word count
- CAS实现SSO单点登录原理
- 基于webrtc的媒体库测试代码以及接口介绍
- 接口与抽象类的总结
- 【codevs1477】【BZOJ2733】永无乡,Splay+启发式合并
- 【Linux多进程通信】共享内存
- 【poj 1426】Find The Multiple 题意&题解&代码(C++)
- blas和lapack的安装
- mybasit ,maven,spring-mvc与spring 的整合
- [leetcode 24]Swap Nodes in Pairs-----成对翻转链表中的节点
- 软件第四次作业
- 图的深度优先遍历
- matlab 使用的一点观察
- spring 注入学习
- jstl 写的分页