带你玩转手势识别 百度飞桨深度学习7日—Day02
2020-04-21 19:05
821 查看
百度深度学习7日—Day02 手势识别
深度学习三部曲
手势识别基于环境 paddlepaddle 1.7.0(因为我们要用到动态图)
建立模型
- 选择什么样的网络结构?
- 选择多少层数,每层选择多少神经元
其实神经网络也不一定要越深越好,越深的网络结构越复杂,以此来减少过拟合和无法传播到深处等问题,例如resnet和densenet等
我们可以选择简单的神经网络,例如lenet这样经典基础的网络
class LeNet(fluid.dygraph.Layer): def __init__(self, training= True): super(DenseNet, self).__init__() self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu') self.pool1 = Pool2D(pool_size=2, pool_stride=2) self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu') self.pool2 = Pool2D(pool_size=2, pool_stride=2) self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu') self.pool3 = Pool2D(pool_size=2, pool_stride=2) self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu') self.drop_ratiol = 0.5 if training else 0.0 self.fc2 = Linear(input_dim=4096, output_dim=10) def forward(self, inputs): conv1 = self.conv1(inputs) # 32 32 98 98 pool1 = self.pool1(conv1) # 32 32 49 49 conv2 = self.conv2(pool1) # 32 32 47 47 pool2 = self.pool2(conv2) # 32 32 23 23 conv3 = self.conv3(pool2) # 32 64 21 21 pool3 = self.pool3(conv3) # 32 64 10 10 rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1]) fc1 = self.fc1(rs_1) drop1 = fluid.layers.dropout(fc1, self.drop_ratiol) y = self.fc2(drop1) return y
这里我们使用paddlepaddle实现卷积神经网络
损失函数
选择常用损失函数,平方误差,交叉熵等损失函数
这里我们选择paddlepaddle中实现的交叉熵
fluid.layers.softmax_with_cross_entropy
这里提供一下paddlepaddle动态图的官方文档
https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/dygraph_cn.html
参数学习
- 梯度下降
- 反向传播算法
优化算法主要有GD,SGD,Momentum,RMSProp和Adam算法
我们选择 fluid.optimizer.Momentum 基于梯度的移动指数加权平均,他就像是拥有动量一样,没法说停就停【手动狗头】
我们使用paddlepaddle1.7.0的动态图进行训练
数据读取预处理部分
import os import time import random import numpy as np from PIL import Image import matplotlib.pyplot as plt import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers from multiprocessing import cpu_count from paddle.fluid.dygraph import Pool2D,Conv2D from paddle.fluid.dygraph import Linear # 生成图像列表 data_path = '/home/aistudio/data/data23668/Dataset' character_folders = os.listdir(data_path) # print(character_folders) if(os.path.exists('./train_data.list')): os.remove('./train_data.list') if(os.path.exists('./test_data.list')): os.remove('./test_data.list') for character_folder in character_folders: with open('./train_data.list', 'a') as f_train: with open('./test_data.list', 'a') as f_test: if character_folder == '.DS_Store': continue character_imgs = os.listdir(os.path.join(data_path,character_folder)) count = 0 for img in character_imgs: if img =='.DS_Store': continue if count%10 == 0: f_test.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n') else: f_train.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n') count +=1 print('列表已生成') # 定义训练集和测试集的reader def data_mapper(sample): img, label = sample img = Image.open(img) img = img.resize((100, 100), Image.ANTIALIAS) img = np.array(img).astype('float32') img = img.transpose((2, 0, 1)) img = img/255.0 return img, label def data_reader(data_list_path): def reader(): with open(data_list_path, 'r') as f: lines = f.readlines() for line in lines: img, label = line.split('\t') yield img, int(label) return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512) # 用于训练的数据提供器 train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=256), batch_size=32) # 用于测试的数据提供器 test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=32)
模型定义部分参考上面建立模型处
class LeNet(fluid.dygraph.Layer): def __init__(self, training= True): super(DenseNet, self).__init__() self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu') self.pool1 = Pool2D(pool_size=2, pool_stride=2) self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu') self.pool2 = Pool2D(pool_size=2, pool_stride=2) self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu') self.pool3 = Pool2D(pool_size=2, pool_stride=2) self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu') self.drop_ratiol = 0.5 if training else 0.0 self.fc2 = Linear(input_dim=4096, output_dim=10) def forward(self, inputs): conv1 = self.conv1(inputs) # 32 32 98 98 pool1 = self.pool1(conv1) # 32 32 49 49 conv2 = self.conv2(pool1) # 32 32 47 47 pool2 = self.pool2(conv2) # 32 32 23 23 conv3 = self.conv3(pool2) # 32 64 21 21 pool3 = self.pool3(conv3) # 32 64 10 10 rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1]) fc1 = self.fc1(rs_1) drop1 = fluid.layers.dropout(fc1, self.drop_ratiol) y = self.fc2(drop1) return y
动态图进行训练
with fluid.dygraph.guard(): model=LeNet(True) #模型实例化 (修改) model.train() #训练模式 # opt=fluid.optimizer.SGDOptimizer(learning_rate=0.01, parameter_list=model.parameters())#优化器选用SGD随机梯度下降,学习率为0.001. opt =fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameter_list=model.parameters()) # 修改 epochs_num=150 #迭代次数(修改20->150) /* 60epoch差不多能达到90+ 建议100左右 */ for pass_num in range(epochs_num): for batch_id,data in enumerate(train_reader()): images=np.array([x[0].reshape(3,100,100) for x in data],np.float32) labels = np.array([x[1] for x in data]).astype('int64') labels = labels[:, np.newaxis] # print(images.shape) image=fluid.dygraph.to_variable(images) label=fluid.dygraph.to_variable(labels) logits=model(image) # 预测 (修改) pred = fluid.layers.softmax(logits) # print(predict) # loss=fluid.layers.cross_entropy(predict,label) loss = fluid.layers.softmax_with_cross_entropy(logits, label) # 修改 avg_loss=fluid.layers.mean(loss)#获取loss值 acc=fluid.layers.accuracy(pred, label)#计算精度 (修改) if batch_id!=0 and batch_id%50==0: print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy())) avg_loss.backward() opt.minimize(avg_loss) model.clear_gradients() fluid.save_dygraph(model.state_dict(),'LeNet')#保存模型 修改
模型校验
with fluid.dygraph.guard(): accs = [] model_dict, _ = fluid.load_dygraph('LeNet') # 修改 model = LeNet(True) # 修改 model.load_dict(model_dict) #加载模型参数 model.eval() #训练模式 for batch_id,data in enumerate(test_reader()):#测试集 images=np.array([x[0].reshape(3,100,100) for x in data],np.float32) labels = np.array([x[1] for x in data]).astype('int64') labels = labels[:, np.newaxis] image=fluid.dygraph.to_variable(images) label=fluid.dygraph.to_variable(labels) predict=model(image) acc=fluid.layers.accuracy(predict,label) accs.append(acc.numpy()[0]) avg_acc = np.mean(accs) print(avg_acc)
笔者在这里训练了一下,正确率达到了88%,如果在模型部分再加一层卷积,笔者训练能够达到95% 大家可以自行修改
读取预测图像,进行预测
def load_image(path): img = Image.open(path) img = img.resize((100, 100), Image.ANTIALIAS) img = np.array(img).astype('float32') img = img.transpose((2, 0, 1)) img = img/255.0 print(img.shape) return img #构建预测动态图过程 with fluid.dygraph.guard(): infer_path = '手势.JPG' model=LeNet(False)#模型实例化 model_dict,_=fluid.load_dygraph('LeNet') # 修改 model.load_dict(model_dict)#加载模型参数 model.eval()#评估模式 infer_img = load_image(infer_path) infer_img=np.array(infer_img).astype('float32') infer_img=infer_img[np.newaxis,:, : ,:] infer_img = fluid.dygraph.to_variable(infer_img) result=model(infer_img) display(Image.open('手势.JPG')) print(np.argmax(result.numpy()))
我们可以看见成功预测标签为5的手势,虽然成功率仅有88左右,但作为我们的案例无疑是成功的,大家可以自行修改超参数和网络。
数据集笔者刚来,还不知道怎么上传,没看见有这个按键
给大家贴个课程链接
- 点赞 5
- 收藏
- 分享
- 文章举报
相关文章推荐
- 车牌识别 百度飞桨深度学习7日—Day03
- 口罩分类 百度飞桨深度学习7日—Day04
- 百度深度学习的图像识别进展
- 百度DMLC分布式深度机器学习开源项目(简称“深盟”)上线了如xgboost(速度快效果好的Boosting模型)、CXXNET(极致的C++深度学习库)、Minerva(高效灵活的并行深度学习引擎)以及Parameter Server(一小时训练600T数据)等产品,在语音识别、OCR识别、人脸识别以及计算效率提升上发布了多个成熟产品。
- 百度DMLC分布式深度机器学习开源项目(简称“深盟”)上线了如xgboost(速度快效果好的Boosting模型)、CXXNET(极致的C++深度学习库)、Minerva(高效灵活的并行深度学习引擎)以及Parameter Server(一小时训练600T数据)等产品,在语音识别、OCR识别、人脸识别以及计算效率提升上发布了多个成熟产品。
- 【深度学习基础】数字手势识别实验:2.MLP
- 《21个项目玩转深度学习基于tensorflow》——识别mnist
- 英特尔计算引擎、阿里大规模图形神经网络平台、百度飞桨平台、索尼音乐生成AI套件......重量级深度学习工业产品亮相NeurIPS 2019行业展览会!
- 百度魅族深度学习大赛初赛冠军作品(图像识别.源码)
- 【深度学习基础】数字手势识别实验:1.任务描述
- 深度学习花朵的识别(花朵+CNN+TensorFlow+多张识别)代码
- 图像识别中的深度学习
- 农耕不再靠“天意”,中科赛诺用百度飞桨打造智能“地脸识别”
- 使用深度学习TensorFlow框架进行图片识别
- 学习笔记(05):深度学习之图像识别 核心技术与案例实战-图像分割模型
- 深度学习在图像识别中的研究进展与展望
- 飞桨paddlepaddle深度学习七日学习心得
- 目标识别】深度学习进行目标识别的资源列表(截至2016-4)
- 百度的"深度学习"之路
- 深度学习 目标识别 资源