tensorlayer学习日志12_chapter4_4.6.1
2018-08-19 19:12
267 查看
第4.6.1节,这里先运行用的是:
[code]network, cost, _ = model_batch_norm(x, y_, reuse=False, is_train=True) _, cost_test, acc = model_batch_norm(x, y_, reuse=True, is_train=False)
代码如下:
[code]import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import * import numpy as np import time sess = tf.InteractiveSession() X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) def model(x, y_, reuse): W_init = tf.truncated_normal_initializer(stddev=5e-2) W_init2 = tf.truncated_normal_initializer(stddev=0.04) b_init2 = tf.constant_initializer(value=0.1) with tf.variable_scope("model", reuse=reuse): tl.layers.set_name_reuse(reuse) # github示例上无 net = InputLayer(x, name='input') net = Conv2d(net, n_filter=64, filter_size=(5, 5), strides=(1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1') # net = Conv2dLayer(net, act=tf.nn.relu, shape=[5, 5, 3, 64],strides=[1, 1, 1, 1], padding='SAME', W_init=W_init, name ='cnn1') # output: (batch_size, 24, 24, 64) net = MaxPool2d(net, filter_size=(3, 3), strides=(2, 2), padding='SAME', name='pool1') # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', pool = tf.nn.max_pool, name ='pool1',) # output: (batch_size, 12, 12, 64) net = LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # net.outputs = tf.nn.lrn(net.outputs, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') net = Conv2d(net, n_filter=64, filter_size=(5, 5), strides=(1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2') # output: (batch_size, 12, 12, 64) net = LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') net = MaxPool2d(net, filter_size=(3, 3), strides=(2, 2), padding='SAME', name='pool2') # output: (batch_size, 6, 6, 64) net = FlattenLayer(net, name='flatten') # output: (batch_size, 2304) net = DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu') # output: (batch_size, 384) net = DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu') # output: (batch_size, 192) net = DenseLayer(net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output') # output: (batch_size, 10) y = net.outputs ce = tl.cost.cross_entropy(y, y_, name='cost') # L2 for the MLP, without this, the accuracy will be reduced by 15%. L2 = 0 for p in tl.layers.get_variables_with_name('relu/W', True, True): L2 += tf.contrib.layers.l2_regularizer(0.004)(p) cost = ce + L2 correct_prediction = tf.equal(tf.argmax(y, 1), y_) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return net, cost, acc def model_batch_norm(x, y_, reuse, is_train): W_init = tf.truncated_normal_initializer(stddev=5e-2) W_init2 = tf.truncated_normal_initializer(stddev=0.04) b_init2 = tf.constant_initializer(value=0.1) with tf.variable_scope("model", reuse=reuse): tl.layers.set_name_reuse(reuse) # github示例上无 net = InputLayer(x, name='input') net = Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn1') # output: (batch_size, 24, 24, 64) net = BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch1') net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1') # output: (batch_size, 12, 12, 64) net = Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn2') # output: (batch_size, 12, 12, 64) net = BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2') net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2') # output: (batch_size, 6, 6, 64) net = FlattenLayer(net, name='flatten') # output: (batch_size, 2304) net = DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu') # output: (batch_size, 384) net = DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu') # output: (batch_size, 192) net = DenseLayer(net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output') # output: (batch_size, 10) y = net.outputs ce = tl.cost.cross_entropy(y, y_, name='cost') L2 = 0 for p in tl.layers.get_variables_with_name('relu/W', train_only=True, printable=True): L2 += tf.contrib.layers.l2_regularizer(0.004)(p) cost = ce + L2 correct_prediction = tf.equal(tf.argmax(y, 1), y_) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return net, cost, acc def distort_fn(x, is_train=False): x = tl.prepro.crop(x, 24, 24, is_random=is_train) if is_train: x = tl.prepro.flip_axis(x, axis=1, is_random=True) x = tl.prepro.brightness(x, gamma=0.1, gain=1, is_random=True) x = (x - np.mean(x)) / max(np.std(x), 1e-5) # avoid values divided by 0 return x x = tf.placeholder(tf.float32, shape=[None, 24, 24, 3], name='x') y_ = tf.placeholder(tf.int64, shape=[None, ], name='y_') network, cost, _ = model_batch_norm(x, y_, reuse=False, is_train=True) # 上行是先求出network 和cost ,下行表示复用上行的cost为cost_test,is_train为true时启用distort_fn _, cost_test, acc = model_batch_norm(x, y_, reuse=True, is_train=False) print('~~~~~~~~~~~~训练~~~~~~~~~~~~~~') # n_epoch = 50000 n_epoch = 5 learning_rate = 0.0001 print_freq = 1 batch_size = 128 train_params = network.all_params train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params) tl.layers.initialize_global_variables(sess) print('~~~~~~~各参数~~~~~~~~~~') network.print_params() network.print_layers() print(' learning_rate: %f' % learning_rate) print(' batch_size: %d' % batch_size) for epoch in range(n_epoch): start_time = time.time() for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True): X_train_a = tl.prepro.threading_data(X_train_a, fn=distort_fn, is_train=True) sess.run(train_op, feed_dict={x: X_train_a, y_: y_train_a}) if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) test_loss, test_acc, n_batch = 0, 0, 0 for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True): X_test_a = tl.prepro.threading_data(X_test_a, fn=distort_fn, is_train=False) err, ac = sess.run([cost_test, acc], feed_dict={x: X_test_a, y_: y_test_a}) test_loss += err test_acc += ac n_batch += 1 print(" test loss: %f" % (test_loss / n_batch)) print(" test acc: %f" % (test_acc / n_batch))
运行结果如下,因为每个n_epoch竟然要十分钟,我家破电脑是不行了,所以只n_epoch=5,试试了~~
[code][TL] Load or Download cifar10 > data\cifar10 [TL] InputLayer model/input: (?, 24, 24, 3) [TL] Conv2dLayer model/cnn1: shape:(5, 5, 3, 64) strides:(1, 1, 1, 1) pad:SAME act:identity [TL] BatchNormLayer model/batch1: decay:1.000000 epsilon:0.000010 act:relu is_train:False [TL] PoolLayer model/pool1: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] Conv2dLayer model/cnn2: shape:(5, 5, 64, 64) strides:(1, 1, 1, 1) pad:SAME act:identity [TL] BatchNormLayer model/batch2: decay:1.000000 epsilon:0.000010 act:relu is_train:False [TL] PoolLayer model/pool2: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] FlattenLayer model/flatten: 2304 [TL] DenseLayer model/d1relu: 384 relu [TL] DenseLayer model/d2relu: 192 relu [TL] DenseLayer model/output: 10 identity [TL] [*] geting variables with relu/W [TL] got 0: model/d1relu/W:0 (2304, 384) [TL] got 1: model/d2relu/W:0 (384, 192) [TL] InputLayer model/input: (?, 24, 24, 3) [TL] Conv2dLayer model/cnn1: shape:(5, 5, 3, 64) strides:(1, 1, 1, 1) pad:SAME act:identity [TL] BatchNormLayer model/batch1: decay:0.000000 epsilon:0.000010 act:relu is_train:False [TL] PoolLayer model/pool1: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] Conv2dLayer model/cnn2: shape:(5, 5, 64, 64) strides:(1, 1, 1, 1) pad:SAME act:identity [TL] BatchNormLayer model/batch2: decay:0.000000 epsilon:0.000010 act:relu is_train:False [TL] PoolLayer model/pool2: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] FlattenLayer model/flatten: 2304 [TL] DenseLayer model/d1relu: 384 relu [TL] DenseLayer model/d2relu: 192 relu [TL] DenseLayer model/output: 10 identity [TL] [*] geting variables with relu/W [TL] got 0: model/d1relu/W:0 (2304, 384) [TL] got 1: model/d2relu/W:0 (384, 192) ~~~~~~~~~~~~训练~~~~~~~~~~~~~~ ~~~~~~~各参数~~~~~~~~~~ [TL] param 0: model/cnn1/W_conv2d:0 (5, 5, 3, 64) float32_ref (mean: -6.707941793138161e-05, median: -0.0002945567830465734, std: 0.04467574506998062) [TL] param 1: model/batch1/beta:0 (64,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) [TL] param 2: model/batch1/gamma:0 (64,) float32_ref (mean: 1.0000028610229492, median: 1.0004527568817139, std: 0.0017540337285026908) [TL] param 3: model/batch1/moving_mean:0 (64,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) [TL] param 4: model/batch1/moving_variance:0 (64,) float32_ref (mean: 1.0 , median: 1.0 , std: 0.0 ) [TL] param 5: model/cnn2/W_conv2d:0 (5, 5, 64, 64) float32_ref (mean: 0.00024428602773696184, median: 0.00021454220404848456, std: 0.04403800144791603) [TL] param 6: model/batch2/beta:0 (64,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) [TL] param 7: model/batch2/gamma:0 (64,) float32_ref (mean: 0.9998328685760498, median: 0.9997196197509766, std: 0.0016550050349906087) [TL] param 8: model/batch2/moving_mean:0 (64,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) [TL] param 9: model/batch2/moving_variance:0 (64,) float32_ref (mean: 1.0 , median: 1.0 , std: 0.0 ) [TL] param 10: model/d1relu/W:0 (2304, 384) float32_ref (mean: 1.2988122932711121e-07, median: 3.339437898830511e-05, std: 0.03518938273191452) [TL] param 11: model/d1relu/b:0 (384,) float32_ref (mean: 0.10000001639127731, median: 0.10000000149011612, std: 1.4901161193847656e-08) [TL] param 12: model/d2relu/W:0 (384, 192) float32_ref (mean: -1.2792263987648766e-05, median: -8.625022019259632e-05, std: 0.03525445982813835) [TL] param 13: model/d2relu/b:0 (192,) float32_ref (mean: 0.10000001639127731, median: 0.10000000149011612, std: 1.4901161193847656e-08) [TL] param 14: model/output/W:0 (192, 10) float32_ref (mean: -0.0001271871296921745, median: -4.541782254818827e-05, std: 0.004512488376349211) [TL] param 15: model/output/b:0 (10,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) [TL] num of params: 1068682 [TL] layer 0: model/cnn1/Identity:0 (?, 24, 24, 64) float32 [TL] layer 1: model/batch1/Relu:0 (?, 24, 24, 64) float32 [TL] layer 2: model/pool1:0 (?, 12, 12, 64) float32 [TL] layer 3: model/cnn2/Identity:0 (?, 12, 12, 64) float32 [TL] layer 4: model/batch2/Relu:0 (?, 12, 12, 64) float32 [TL] layer 5: model/pool2:0 (?, 6, 6, 64) float32 [TL] layer 6: model/flatten:0 (?, 2304) float32 [TL] layer 7: model/d1relu/Relu:0 (?, 384) float32 [TL] layer 8: model/d2relu/Relu:0 (?, 192) float32 [TL] layer 9: model/output/Identity:0 (?, 10) float32 learning_rate: 0.000100 batch_size: 128 Epoch 1 of 5 took 636.231431s test loss: 2.591613 test acc: 0.405048 Epoch 2 of 5 took 621.926292s test loss: 2.159147 test acc: 0.461038 Epoch 3 of 5 took 621.099491s test loss: 1.901962 test acc: 0.497396 Epoch 4 of 5 took 621.817092s test loss: 1.750391 test acc: 0.533554 Epoch 5 of 5 took 622.831094s test loss: 1.602891 test acc: 0.561198 [Finished in 3363.0s]
然后再试了下mode那个:
[code]# network, cost, _ = model_batch_norm(x, y_, reuse=False, is_train=True) network, cost, _ = model(x, y_, reuse=False) # _, cost_test, acc = model_batch_norm(x, y_, reuse=True, is_train=False) _, cost_test, acc = model(x, y_, reuse=True)
[code][TL] Load or Download cifar10 > data\cifar10 [TL] InputLayer model/input: (?, 24, 24, 3) [TL] Conv2dLayer model/cnn1: shape:(5, 5, 3, 64) strides:(1, 1, 1, 1) pad:SAME act:relu [TL] PoolLayer model/pool1: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] LocalResponseNormLayer model/norm1: depth_radius: 4, bias: 1.000000, alpha: 0.000111, beta: 0.750000 [TL] Conv2dLayer model/cnn2: shape:(5, 5, 64, 64) strides:(1, 1, 1, 1) pad:SAME act:relu [TL] LocalResponseNormLayer model/norm2: depth_radius: 4, bias: 1.000000, alpha: 0.000111, beta: 0.750000 [TL] PoolLayer model/pool2: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] FlattenLayer model/flatten: 2304 [TL] DenseLayer model/d1relu: 384 relu [TL] DenseLayer model/d2relu: 192 relu [TL] DenseLayer model/output: 10 identity [TL] [*] geting variables with relu/W [TL] got 0: model/d1relu/W:0 (2304, 384) [TL] got 1: model/d2relu/W:0 (384, 192) [TL] InputLayer model/input: (?, 24, 24, 3) [TL] Conv2dLayer model/cnn1: shape:(5, 5, 3, 64) strides:(1, 1, 1, 1) pad:SAME act:relu [TL] PoolLayer model/pool1: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] LocalResponseNormLayer model/norm1: depth_radius: 4, bias: 1.000000, alpha: 0.000111, beta: 0.750000 [TL] Conv2dLayer model/cnn2: shape:(5, 5, 64, 64) strides:(1, 1, 1, 1) pad:SAME act:relu [TL] LocalResponseNormLayer model/norm2: depth_radius: 4, bias: 1.000000, alpha: 0.000111, beta: 0.750000 [TL] PoolLayer model/pool2: ksize:[1, 3, 3, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool [TL] FlattenLayer model/flatten: 2304 [TL] DenseLayer model/d1relu: 384 relu [TL] DenseLayer model/d2relu: 192 relu [TL] DenseLayer model/output: 10 identity [TL] [*] geting variables with relu/W [TL] got 0: model/d1relu/W:0 (2304, 384) [TL] got 1: model/d2relu/W:0 (384, 192) ~~~~~~~~~~~~训练~~~~~~~~~~~~~~ ~~~~~~~各参数~~~~~~~~~~ [TL] param 0: model/cnn1/W_conv2d:0 (5, 5, 3, 64) float32_ref (mean: -0.0007399556925520301, median: -0.0007501489599235356, std: 0.04392556473612785) [TL] param 1: model/cnn1/b_conv2d:0 (64,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) [TL] param 2: model/cnn2/W_conv2d:0 (5, 5, 64, 64) float32_ref (mean: -0.00014188421482685953, median: -6.345209840219468e-05, std: 0.04388545826077461) [TL] param 3: model/cnn2/b_conv2d:0 (64,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) [TL] param 4: model/d1relu/W:0 (2304, 384) float32_ref (mean: -7.075626484720487e-08, median: 2.1870484488317743e-05, std: 0.03522096574306488) [TL] param 5: model/d1relu/b:0 (384,) float32_ref (mean: 0.10000001639127731, median: 0.10000000149011612, std: 1.4901161193847656e-08) [TL] param 6: model/d2relu/W:0 (384, 192) float32_ref (mean: 5.2912779210601e-05, median: 7.745348557364196e-05, std: 0.035123955458402634) [TL] param 7: model/d2relu/b:0 (192,) float32_ref (mean: 0.10000001639127731, median: 0.10000000149011612, std: 1.4901161193847656e-08) [TL] param 8: model/output/W:0 (192, 10) float32_ref (mean: -7.402043411275372e-05, median: -2.809507896017749e-05, std: 0.004525790922343731) [TL] param 9: model/output/b:0 (10,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 ) learning_rate: 0.000100 [TL] num of params: 1068298 [TL] layer 0: model/cnn1/Relu:0 (?, 24, 24, 64) float32 [TL] layer 1: model/pool1:0 (?, 12, 12, 64) float32 [TL] layer 2: model/norm1/LRN:0 (?, 12, 12, 64) float32 [TL] layer 3: model/cnn2/Relu:0 (?, 12, 12, 64) float32 [TL] layer 4: model/norm2/LRN:0 (?, 12, 12, 64) float32 [TL] layer 5: model/pool2:0 (?, 6, 6, 64) float32 [TL] layer 6: model/flatten:0 (?, 2304) float32 [TL] layer 7: model/d1relu/Relu:0 (?, 384) float32 [TL] layer 8: model/d2relu/Relu:0 (?, 192) float32 [TL] layer 9: model/output/Identity:0 (?, 10) float32 batch_size: 128 Epoch 1 of 5 took 743.840507s test loss: 2.578664 test acc: 0.398538 [Cancelled]
速度更慢了,700多秒~~~~~,从第一个 epoch来看,起始是比model_batch_norm低的,总的来说图像处理CNN还是要有强大的GPU才行~~
阅读更多相关文章推荐
- tensorlayer学习日志13_chapter4_4.6.2
- 7.12学习日志
- python学习日志12
- tensorlayer学习日志19_chapter8_2
- Java学习日志(12-1-多线程中锁的等待与唤醒)
- Cocos2d-x 3.1.1 学习日志12--一Cocos2dx3.1.1移植到Android平台的方法(最有用最有效的!!)
- 苏嵌学习日志12 07.20
- 【2015/11/1】C学习日志_Day11&12 数据类型 指针 内存对齐 函数指针
- java基础学习日志12
- tensorlayer学习日志14_chapter5_5.3
- 浅谈内存开辟(c#学习日志)补2013 12 17
- tensorlayer学习日志16_chapter6_6.4
- tensorlayer学习日志17_chapter7_7.2
- 12-1学习日志夜话前四集
- muduo源码学习(12)-日志类封装2
- mpi学习日志(12):mpi4py与需要buf的大写版本函数
- AJAX ControlToolkit学习日志-FilteredTextBoxExtender(12)
- Cocos2d-x 3.1.1 学习日志12--一Cocos2dx3.1.1移植到Android平台的方法(最实用最有效的!!)
- tensorlayer学习日志18_chapter8_1
- cocos2d-x学习日志(12) --弹出对话框的设计与实现