Horovod_Tensorflow_Gpu环境配置及模型数据并行
2019-04-22 21:08
495 查看
版权所有,转载请联系说明。
1.环境配置
参考文献:https://www.geek-share.com/detail/2758173524.html
参考文献:https://www.cnblogs.com/Jay-CFD/p/6114852.html
参考文献:https://blog.csdn.net/qq_25792799/article/details/80500292
2.基于Tensorflow在Minist数据集上的实现(非多GPU版本):
以下代码亲测可用。
[code]import tensorflow as tf import numpy as np from tensorflow.examples.tutorials.mnist import input_data mnist=input_data.read_data_sets("/tmp/mnist/",one_hot=True) num_gpus=2 num_steps=200 learning_rate=0.001 batch_size=1024 display_step=10 num_input=784 num_classes=10 def conv_net(x,is_training): # "updates_collections": None is very import ,without will only get 0.10 batch_norm_params = {"is_training": is_training, "decay": 0.9, "updates_collections": None} #,'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ] with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with tf.variable_scope("ConvNet",reuse=tf.AUTO_REUSE): x = tf.reshape(x, [-1, 28, 28, 1]) net = slim.conv2d(x, 6, [5,5], scope="conv_1") net = slim.max_pool2d(net, [2, 2],scope="pool_1") net = slim.conv2d(net, 12, [5,5], scope="conv_2") net = slim.max_pool2d(net, [2, 2], scope="pool_2") net = slim.flatten(net, scope="flatten") net = slim.fully_connected(net, 100, scope="fc") net = slim.dropout(net,is_training=is_training) net = slim.fully_connected(net, num_classes, scope="prob", activation_fn=None,normalizer_fn=None) return net def train_single(): X = tf.placeholder(tf.float32, [None, num_input]) Y = tf.placeholder(tf.float32, [None, num_classes]) logits=conv_net(X,True) loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y,logits=logits)) opt=tf.train.AdamOptimizer(learning_rate) train_op=opt.minimize(loss) logits_test=conv_net(X,False) correct_prediction = tf.equal(tf.argmax(logits_test, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(1,num_steps+1): batch_x, batch_y = mnist.train.next_batch(batch_size) sess.run(train_op,feed_dict={X:batch_x,Y:batch_y}) if step%display_step==0 or step==1: loss_value,acc=sess.run([loss,accuracy],feed_dict={X:batch_x,Y:batch_y}) print("Step:" + str(step) + ":" + str(loss_value) + " " + str(acc)) print("Done") print("Testing Accuracy:",np.mean([sess.run(accuracy, feed_dict={X: mnist.test.images[i:i + batch_size], Y: mnist.test.labels[i:i + batch_size]}) for i in range(0, len(mnist.test.images), batch_size)])) if __name__ == "__main__": train_single() #此代码引用自:https://blog.csdn.net/minstyrain/article/details/80986397
3.基于Tensorflow在Minist数据集上的实现(多GPU版本):
[code]import tensorflow as tf import numpy as np from tensorflow.examples.tutorials.mnist import input_data import tensorflow.contrib.slim as slim import horovod.tensorflow as hvd # from keras.datasets import mnist mnist = input_data.read_data_sets("../../dataset/mnist/", one_hot=True) num_gpus = 2 num_steps = 20000 learning_rate = 0.001 batch_size = 1024 display_step = 10 num_input = 784 num_classes = 10 def conv_net(x, is_training): # "updates_collections": None is very import ,without will only get 0.10 batch_norm_params = {"is_training": is_training, "decay": 0.9, "updates_collections": None} # ,'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ] with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with tf.variable_scope("ConvNet", reuse=tf.AUTO_REUSE): x = tf.reshape(x, [-1, 28, 28, 1]) net = slim.conv2d(x, 6, [5, 5], scope="conv_1") net = slim.max_pool2d(net, [2, 2], scope="pool_1") net = slim.conv2d(net, 12, [5, 5], scope="conv_2") net = slim.max_pool2d(net, [2, 2], scope="pool_2") net = slim.flatten(net, scope="flatten") net = slim.fully_connected(net, 100, scope="fc") net = slim.dropout(net, is_training=is_training) net = slim.fully_connected(net, num_classes, scope="prob", activation_fn=None, normalizer_fn=None) return net #499.36237692832947 #326.96640610694885 def train_single(): hvd.init() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) X = tf.placeholder(tf.float32, [None, num_input]) Y = tf.placeholder(tf.float32, [None, num_classes]) logits = conv_net(X, True) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits)) opt = tf.train.AdamOptimizer(learning_rate) opt = hvd.DistributedOptimizer(opt) hooks = [ # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states # from rank 0 to all other processes. This is necessary to ensure consistent # initialization of all workers when training is started with random weights # or restored from a checkpoint. hvd.BroadcastGlobalVariablesHook(0), # Horovod: adjust number of steps based on number of GPUs. # tf.train.StopAtStepHook(last_step=200 // hvd.size()), # tf.train.LoggingTensorHook(tensors={'step': global_step, 'loss': loss}, # every_n_iter=10), ] train_op = opt.minimize(loss) logits_test = conv_net(X, False) correct_prediction = tf.equal(tf.argmax(logits_test, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) import time sttime = time.time() with tf.train.MonitoredTrainingSession( config=config, hooks=hooks) as mon_sess: import time time.localtime(1) print(mon_sess.should_stop()) # while not mon_sess.should_stop(): # Perform synchronous training. for step in range(1,num_steps+1): batch_x, batch_y = mnist.train.next_batch(batch_size) mon_sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) # if step % display_step == 0 or step == 1: if step % display_step == 0 or step == 1: loss_value, acc = mon_sess.run([loss, accuracy], feed_dict={X: batch_x, Y: batch_y}) print("Step:" + str(step) + ":" + str(loss_value) + " " + str(acc)) if hvd.rank() == 0: print("Done") print("Testing Accuracy:", np.mean([mon_sess.run(accuracy, feed_dict={X: mnist.test.images[i:i + batch_size], Y: mnist.test.labels[i:i + batch_size]}) for i in range(0, len(mnist.test.images), batch_size)])) endtime=time.time() print(endtime-sttime) mon_sess.should_stop() if __name__ == "__main__": train_single()
相关文章推荐
- 分享一下三个写的比较好的tensorflow多GPU模型,实现数据并行/模型并行
- Windows 7上安装配置TensorFlow-GPU运算环境
- Ubuntu16.04+GTX1070显卡驱动+cuda8.0+cudnn5.1+tensorflow-gpu环境配置
- anaconda在windows系统上配置tensorflow—gpu、keras和pytorch环境
- WIN10操作系统安装GPU支持TENSORFLOW 1.5.0,Anaconda+pycharm编程环境配置
- Win10中TensorFlow的GPU环境的配置
- [环境配置] Ubuntu 16.04.3 + 1080Ti 环境下配置Tensorflow-GPU
- 利用TESLA GPU和MATLAB实现大规模型数据并行处理
- TensorFlow安装方法二【第一步:GPU环境配置部分(装CPU版跳过直接看第二步)】(Windows10 64位 )
- Ubuntu14.04下支持GTX1070 GPU加速Tensorflow环境配置
- win10下Anaconda CUDA cudnn 和 tensorflow-gpu 的安装及其环境配置
- Win7 64位+tensorflow1.4.0-GPU版+CUDA8.0+cudnn6.0环境配置
- Ubuntu16.04+GTX1050ti+CUDA8.0+TensorFlow-gpu+Keras+Pycharm配置深度学习环境
- 配置pycharm远程调试的环境(服务器为linux系统)&&服务器上安装tensorflow-gpu及配置cuda环境&&相关问题的解决办法
- TensorFlow-GPU环境配置之三——安装bazel
- tensorflow基于GPU环境配置
- Tensorflow(GPU) 在Win10+Cuda8.0环境下安装以及Cudnn包配置 图文详细教程
- Linux常用命令(实用,配置tensorflow环境使用服务器GPU预备知识)
- win10下 anaconda安装tensorflow,pycharm环境配置(gpu版)
- Ubuntu16.04配置tensorflow-gpu环境(CUDA+cuDNN)