Tensorflow: Deep Multi-Layer Pecptron with Xavier Initializer
2016-06-30 19:08
375 查看
Xavier initializer
https://github.com/google/prettytensor/blob/a69f13998258165d6682a47a931108d974bab05e/prettytensor/layers.pyimport numpy as np import tensorflow as tf import matplotlib.pyplot as plt from tensorflow.examples.tutorials.mnist import input_data hidden_layer_size = [256]*4 input_layer_size = 784 output_layer_size = 10 mnist = input_data.read_data_sets('data/', one_hot=True) train_img = mnist.train.images train_lbl = mnist.train.labels test_img = mnist.test.images test_lbl = mnist.test.labels def xavier_init(n_inputs, n_outputs, uniform=True): if uniform: init_range = tf.sqrt(6.0 / (n_inputs+ n_outputs)) return tf.random_uniform_initializer(-init_range, init_range) else: stddev = tf.sqrt(3.0 / (n_inputs + n_outputs)) #stddev = tf.sqrt(1.0 / (2 * n_inputs)) return tf.truncated_normal_initializer(stddev=stddev) def initial_weights(ils, hls, ols): weights,bias = {}, {} for i in xrange(len(hls)+1): fan_in = ils if i==0 else hls[i-1] fan_out = ols if i==len(hls) else hls[i] print fan_in, fan_out #stddev = np.sqrt(1.0 / (1 * fan_in)) # Glorot et al. 2010 stddev = np.sqrt(1.0 / (2 * fan_in)) # He et al. 2015 for relu weights[i] = tf.Variable(tf.random_normal([fan_in, fan_out], stddev=stddev)) # weights[i] = tf.get_variable(name=str(i), shape=[fan_in, fan_out], dtype=tf.float32, # initializer=xavier_init(fan_in, fan_out, uniform=False)) bias[i] = tf.Variable(tf.random_normal([fan_out])) return weights, bias def mlp(_x, _w, _b, _keep_prob): layers = {} for i in xrange(len(_w)): if i == 0: layers[i] = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_x, _w[i]), _b[i])), _keep_prob) elif i < len(_w)-1: layers[i] = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layers[i-1], _w[i]), _b[i])), _keep_prob) else: layers[i] = tf.add(tf.matmul(layers[i-1], _w[i]), _b[i]) return layers[len(_w) - 1] weights, bias = initial_weights(input_layer_size, hidden_layer_size, output_layer_size) x = tf.placeholder(tf.float32, [None, input_layer_size], name='input') y = tf.placeholder(tf.float32, [None, output_layer_size], name='output') dropout_keep_prob = tf.placeholder(tf.float32) score = mlp(x, weights, bias, dropout_keep_prob) prob = tf.nn.softmax(score) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(score, y)) lr = 0.001 optimizer = tf.train.AdamOptimizer(lr).minimize(loss) # optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss) pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y,1)) acc = tf.reduce_mean(tf.cast(pred, tf.float32)) init = tf.initialize_all_variables() epoch = 100 batch_size = 200 snapshot = 5 sess = tf.Session() with tf.Session() as sess: sess.run(init) loss_cache = [] acc_cache = [] for ep in xrange(epoch): num_batch = mnist.train.num_examples/batch_size avg_loss, avg_acc = 0, 0 for nb in xrange(num_batch): batch_x, batch_y = mnist.train.next_batch(batch_size) out = sess.run([optimizer, acc, loss], feed_dict={x:batch_x, y:batch_y, dropout_keep_prob:0.7}) avg_loss += out[2]/num_batch avg_acc += out[1]/num_batch loss_cache.append(avg_loss) acc_cache.append(avg_acc) if ep % snapshot ==0: print 'Epoch: %d, loss: %.4f, acc: %.4f'%(ep, avg_loss, acc_cache[-1]) print 'test accuracy:' , acc.eval({x:test_img, y:test_lbl, dropout_keep_prob:1.0}) plt.figure(1) plt.plot(range(len(loss_cache)), loss_cache, 'b-', label='loss') plt.legend(loc = 'upper right') plt.figure(2) plt.plot(range(len(acc_cache)), acc_cache, 'o-', label='acc') plt.legend(loc = 'lower right') plt.show() # Epoch: 0, loss: 0.7072, acc: 0.7610 # Epoch: 5, loss: 0.1153, acc: 0.9661 # Epoch: 10, loss: 0.0751, acc: 0.9776 # Epoch: 15, loss: 0.0595, acc: 0.9819 # Epoch: 20, loss: 0.0473, acc: 0.9849 # Epoch: 25, loss: 0.0410, acc: 0.9876 # Epoch: 30, loss: 0.0394, acc: 0.9881 # Epoch: 35, loss: 0.0342, acc: 0.9896 # Epoch: 40, loss: 0.0303, acc: 0.9904 # Epoch: 45, loss: 0.0284, acc: 0.9910 # Epoch: 50, loss: 0.0258, acc: 0.9916 # Epoch: 55, loss: 0.0292, acc: 0.9911 # Epoch: 60, loss: 0.0238, acc: 0.9928 # Epoch: 65, loss: 0.0259, acc: 0.9924 # Epoch: 70, loss: 0.0223, acc: 0.9931 # Epoch: 75, loss: 0.0212, acc: 0.9937 # Epoch: 80, loss: 0.0225, acc: 0.9937 # Epoch: 85, loss: 0.0200, acc: 0.9939 # Epoch: 90, loss: 0.0208, acc: 0.9940 # Epoch: 95, loss: 0.0206, acc: 0.9936 # test accuracy: 0.9823
相关文章推荐
- 视频播放的基本原理
- 【杭电oj】2081 - 手机短号(水)
- 推荐 IIS7.0下ThinkPHP提示“缓存文件写入失败!” 需要设置user的权限即可 Home/Runtime/Cache/
- 深入研究Java类加载机制、new以及 Class.getResource和ClassLoader.getResource区别
- 发布一个基于 Reactor 模式的 C++ 网络库
- linux 安装supervise服务
- 轮滑加小圆点的适配器
- android 百度地图系列之结合方向传感器的地图定位
- 项目实践:对候选人得票的统计程序
- 网络编程(陈硕)
- 项目实践:学生成绩管理
- nodejs启动webserver服务
- linux下ipv6配置及ipv6编程的connect问题
- Delphi FMX Grid列头样式设定
- Problem E: Satellite Photographs
- IIS10搭建FTP服务
- 快速排序
- css属性列表 和 属性值含义
- Java String中的indexof 和 substring 用法
- 怎样解决使用feof()函数时出现的问题?