基于DCGAN的动漫头像生成神经网络实现
2017-09-06 21:08
656 查看
一、前言
1、什么是DCGAN?2、DCGAN的TensorFlow实现
3、什么是转置卷积?
4、转置卷积的Tensorflow实现
5、Batch Normalization解读
本文假设读者已经了解GAN及CNN的基本原理实现,如不清楚可参考以下文章:
基于GAN的的mnist训练集图片生成神经网络实现
基于CNN的验证码识别神经网络实现
二、实战
1、训练数据处理(1)数据源:百度云盘 提取码:g5qa
(2)创建一个生成器
class Avatar: def __init__(self): self.data_name = 'faces' self.source_shape = (96, 96, 3) self.resize_shape = (48, 48, 3) self.crop = True self.img_shape = self.source_shape if not self.crop else self.resize_shape self.img_list = self._get_img_list() self.batch_size = 64 self.batch_shape = (self.batch_size, ) + self.img_shape self.chunk_size = len(self.img_list) // self.batch_size def _get_img_list(self): path = os.path.join(os.getcwd(), self.data_name, '*.jpg') return glob(path) def _get_img(self, name): assert name in self.img_list img = scipy.misc.imread(name).astype(np.float32) assert img.shape == self.source_shape return self._resize(img) if self.crop else img def _resize(self, img): h, w = img.shape[:2] resize_h, resize_w = self.resize_shape[:2] crop_h, crop_w = self.source_shape[:2] j = int(round((h - crop_h) / 2.)) i = int(round((w - crop_w) / 2.)) cropped_image = scipy.misc.imresize(img[j:j + crop_h, i:i + crop_w], [resize_h, resize_w]) return np.array(cropped_image) / 127.5 - 1. @staticmethod def save_img(image, path): scipy.misc.imsave(path, image) return True def batches(self): start = 0 end = self.batch_size for _ in range(self.chunk_size): name_list = self.img_list[start:end] imgs = [self._get_img(name) for name in name_list] batches = np.zeros(self.batch_shape) batches[::] = imgs yield batches start += self.batch_size end += self.batch_size读取本地图片数据并创建一个生成器,作为后续模型数据源
2.模型参数定义
def __init__(self): self.avatar = Avatar() # 真实图片shape (height, width, depth) self.img_shape = self.avatar.img_shape # 一个batch的图片向量shape (batch, height, width, depth) self.batch_shape = self.avatar.batch_shape # 一个batch包含图片数量 self.batch_size = self.avatar.batch_size # batch数量 self.chunk_size = self.avatar.chunk_size # 噪音图片size self.noise_img_size = 100 # 卷积转置输出通道数量 self.gf_size = 64 # 卷积输出通道数量 self.df_size = 64 # 训练循环次数 self.epoch_size = 50 # 学习率 self.learning_rate = 0.0002 # 优化指数衰减率 self.beta1 = 0.5 # 生成图片数量 self.sample_size = 643、输入定义
# 真实图片 real_imgs = tf.placeholder(tf.float32, self.batch_shape, name='real_images') # 噪声图片 noise_imgs = tf.placeholder(tf.float32, [None, self.noise_img_size], name='noise_images')我们利用随机的噪音输入来生成图片
4、生成器
def generator(self, noise_imgs, train=True): with tf.variable_scope('generator'): # 分别对应每个layer的height, width s_h, s_w, _ = self.img_shape s_h2, s_w2 = self.conv_out_size_same(s_h, 2), self.conv_out_size_same(s_w, 2) s_h4, s_w4 = self.conv_out_size_same(s_h2, 2), self.conv_out_size_same(s_w2, 2) s_h8, s_w8 = self.conv_out_size_same(s_h4, 2), self.conv_out_size_same(s_w4, 2) s_h16, s_w16 = self.conv_out_size_same(s_h8, 2), self.conv_out_size_same(s_w8, 2) # layer 0 # 对输入噪音图片进行线性变换 z, h0_w, h0_b = self.linear(noise_imgs, self.gf_size*8*s_h16*s_w16) # reshape为合适的输入层格式 h0 = tf.reshape(z, [-1, s_h16, s_w16, self.gf_size * 8]) # 对数据进行归一化处理 加快收敛速度 h0 = self.batch_normalizer(h0, train=train, name='g_bn0') # 激活函数 h0 = tf.nn.relu(h0) # layer 1 # 卷积转置进行上采样 h1, h1_w, h1_b = self.deconv2d(h0, [self.batch_size, s_h8, s_w8, self.gf_size*4], name='g_h1') h1 = self.batch_normalizer(h1, train=train, name='g_bn1') h1 = tf.nn.relu(h1) # layer 2 h2, h2_w, h2_b = self.deconv2d(h1, [self.batch_size, s_h4, s_w4, self.gf_size*2], name='g_h2') h2 = self.batch_normalizer(h2, train=train, name='g_bn2') h2 = tf.nn.relu(h2) # layer 3 h3, h3_w, h3_b = self.deconv2d(h2, [self.batch_size, s_h2, s_w2, self.gf_size*1], name='g_h3') h3 = self.batch_normalizer(h3, train=train, name='g_bn3') h3 = tf.nn.relu(h3) # layer 4 h4, h4_w, h4_b = self.deconv2d(h3, self.batch_shape, name='g_h4') return tf.nn.tanh(h4)DCGAN的生成器为卷积网络,使用转置卷积进行上采样,去除pooling层,利用batch normalization加快收敛速度。
5、判别器
def discriminator(self, real_imgs, reuse=False): with tf.variable_scope("discriminator", reuse=reuse): # layer 0 # 卷积操作 h0 = self.conv2d(real_imgs, self.df_size, name='d_h0_conv') # 激活函数 h0 = self.lrelu(h0) # layer 1 h1 = self.conv2d(h0, self.df_size*2, name='d_h1_conv') h1 = self.batch_normalizer(h1, name='d_bn1') h1 = self.lrelu(h1) # layer 2 h2 = self.conv2d(h1, self.df_size*4, name='d_h2_conv') h2 = self.batch_normalizer(h2, name='d_bn2') h2 = self.lrelu(h2) # layer 3 h3 = self.conv2d(h2, self.df_size*8, name='d_h3_conv') h3 = self.batch_normalizer(h3, name='d_bn3') h3 = self.lrelu(h3) # layer 4 h4, _, _ = self.linear(tf.reshape(h3, [self.batch_size, -1]), 1, name='d_h4_lin') return tf.nn.sigmoid(h4), h4DCGAN的判别器为卷积网络,这里使用卷积操作对图像进行特征提取识别。
6、损失和优化
@staticmethod def loss_graph(real_logits, fake_logits): # 生成器图片loss # 生成器希望判别器判断出来的标签为1 gen_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logits, labels=tf.ones_like(fake_logits))) # 判别器识别生成器图片loss # 判别器希望识别出来的标签为0 fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logits, labels=tf.zeros_like(fake_logits))) # 判别器识别真实图片loss # 判别器希望识别出来的标签为1 real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=real_logits, labels=tf.ones_like(real_logits))) # 判别器总loss dis_loss = tf.add(fake_loss, real_loss) return gen_loss, fake_loss, real_loss, dis_loss @staticmethod def optimizer_graph(gen_loss, dis_loss, learning_rate, beta1): # 所有定义变量 train_vars = tf.trainable_variables() # 生成器变量 gen_vars = [var for var in train_vars if var.name.startswith('generator')] # 判别器变量 dis_vars = [var for var in train_vars if var.name.startswith('discriminator')] # optimizer # 生成器与判别器作为两个网络需要分别优化 gen_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1).minimize(gen_loss, var_list=gen_vars) dis_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1).minimize(dis_loss, var_list=dis_vars) return gen_optimizer, dis_optimizer7、开始训练
# 开始训练 saver = tf.t b8c7 rain.Saver() step = 0 # 指定占用GPU比例 # tensorflow默认占用全部GPU显存 防止在机器显存被其他程序占用过多时可能在启动时报错 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) for epoch in range(self.epoch_size): batches = self.avatar.batches() for batch_imgs in batches: # generator的输入噪声 noises = np.random.uniform(-1, 1, size=(self.batch_size, self.noise_img_size)).astype(np.float32) # 优化 _ = sess.run(dis_optimizer, feed_dict={real_imgs: batch_imgs, noise_imgs: noises}) _ = sess.run(gen_optimizer, feed_dict={noise_imgs: noises}) _ = sess.run(gen_optimizer, feed_dict={noise_imgs: noises}) step += 1 print(datetime.now().strftime('%c'), epoch, step)8、结果
跑了50个循环大概用了5个小时,笔者GPU比较一般,就不继续训练了。可以看到,到这里已经生成了不错的效果。
三、其他
具体代码可以在我的github上找到:https://github.com/lpty/tensorflow_tutorial相关文章推荐
- 基于GAN的mnist训练集图片生成神经网络实现
- CMU提出对抗生成网络:可实现对人脸识别模型的神经网络攻击
- (尤其是训练集验证集的生成)深度学习 tensorflow 实战(2) 实现简单神经网络以及随机梯度下降算法S.G.D
- 如何用70行Java代码实现深度神经网络算法
- matlab实现神经网络
- 神经网络路径匹配实现GPS轨迹交友的系统及方法
- Java 实现 BP 神经网络完成 Iris 数据分类
- tensorflow 学习笔记12 循环神经网络RNN LSTM结构实现MNIST手写识别
- 径向基(RBF)神经网络python实现
- 机器学习笔记:tensorflow实现卷积神经网络经典案例--识别手写数字
- python实现LSTM神经网络模型
- 深度学习(07)_RNN-循环神经网络-02-Tensorflow中的实现
- 识别动漫图片的神经网络构建
- 循环神经网络教程第四部分-用Python和Theano实现GRU/LSTM循环神经网络
- 简单的前向传播模型实现(四层神经网络),菜鸟用于交流
- 双向循环神经网络tensorflow实现
- tensorflow实现最基本的神经网络 + 对比GD、SGD、batch-GD的训练方法
- PyTorch上搭建简单神经网络实现回归和分类
- pytorch从头开始实现一个RNN(循环神经网络)
- 自己动手实现机器学习算法:神经网络(附源代码)