您的位置:首页 > 其它

带你玩转手势识别 百度飞桨深度学习7日—Day02

2020-04-21 19:05 821 查看

百度深度学习7日—Day02 手势识别

  • 我们使用paddlepaddle1.7.0的动态图进行训练
  • 深度学习三部曲

    手势识别基于环境 paddlepaddle 1.7.0(因为我们要用到动态图)

    建立模型

    1. 选择什么样的网络结构?
    2. 选择多少层数,每层选择多少神经元

    其实神经网络也不一定要越深越好,越深的网络结构越复杂,以此来减少过拟合和无法传播到深处等问题,例如resnet和densenet等
    我们可以选择简单的神经网络,例如lenet这样经典基础的网络

    class LeNet(fluid.dygraph.Layer):
    def __init__(self, training= True):
    super(DenseNet, self).__init__()
    self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu')
    self.pool1 = Pool2D(pool_size=2, pool_stride=2)
    
    self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu')
    self.pool2 = Pool2D(pool_size=2, pool_stride=2)
    
    self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu')
    self.pool3 = Pool2D(pool_size=2, pool_stride=2)
    
    self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu')
    self.drop_ratiol = 0.5 if training else 0.0
    self.fc2 = Linear(input_dim=4096, output_dim=10)
    
    def forward(self, inputs):
    conv1 = self.conv1(inputs)  # 32 32 98 98
    pool1 = self.pool1(conv1)  # 32 32 49 49
    
    conv2 = self.conv2(pool1)  # 32 32 47 47
    pool2 = self.pool2(conv2)  # 32 32 23 23
    
    conv3 = self.conv3(pool2)  # 32 64 21 21
    pool3 = self.pool3(conv3)  # 32 64 10 10
    
    rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1])
    fc1 = self.fc1(rs_1)
    drop1 = fluid.layers.dropout(fc1, self.drop_ratiol)
    y = self.fc2(drop1)
    
    return y

    这里我们使用paddlepaddle实现卷积神经网络

    损失函数

    选择常用损失函数,平方误差,交叉熵等损失函数
    这里我们选择paddlepaddle中实现的交叉熵
    fluid.layers.softmax_with_cross_entropy

    这里提供一下paddlepaddle动态图的官方文档
    https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/dygraph_cn.html

    参数学习

    1. 梯度下降
    2. 反向传播算法

    优化算法主要有GD,SGD,Momentum,RMSProp和Adam算法

    我们选择 fluid.optimizer.Momentum 基于梯度的移动指数加权平均,他就像是拥有动量一样,没法说停就停【手动狗头】

    我们使用paddlepaddle1.7.0的动态图进行训练

    数据读取预处理部分

    import os
    import time
    import random
    import numpy as np
    from PIL import Image
    import matplotlib.pyplot as plt
    import paddle
    import paddle.fluid as fluid
    import paddle.fluid.layers as layers
    from multiprocessing import cpu_count
    from paddle.fluid.dygraph import Pool2D,Conv2D
    from paddle.fluid.dygraph import Linear
    
    # 生成图像列表
    data_path = '/home/aistudio/data/data23668/Dataset'
    character_folders = os.listdir(data_path)
    # print(character_folders)
    if(os.path.exists('./train_data.list')):
    os.remove('./train_data.list')
    if(os.path.exists('./test_data.list')):
    os.remove('./test_data.list')
    
    for character_folder in character_folders:
    
    with open('./train_data.list', 'a') as f_train:
    with open('./test_data.list', 'a') as f_test:
    if character_folder == '.DS_Store':
    continue
    character_imgs = os.listdir(os.path.join(data_path,character_folder))
    count = 0
    for img in character_imgs:
    if img =='.DS_Store':
    continue
    if count%10 == 0:
    f_test.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
    else:
    f_train.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
    count +=1
    print('列表已生成')
    
    # 定义训练集和测试集的reader
    def data_mapper(sample):
    img, label = sample
    img = Image.open(img)
    img = img.resize((100, 100), Image.ANTIALIAS)
    img = np.array(img).astype('float32')
    img = img.transpose((2, 0, 1))
    img = img/255.0
    return img, label
    
    def data_reader(data_list_path):
    def reader():
    with open(data_list_path, 'r') as f:
    lines = f.readlines()
    for line in lines:
    img, label = line.split('\t')
    yield img, int(label)
    return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512)
    
    # 用于训练的数据提供器
    train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=256), batch_size=32)
    # 用于测试的数据提供器
    test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=32)

    模型定义部分参考上面建立模型处

    class LeNet(fluid.dygraph.Layer):
    def __init__(self, training= True):
    super(DenseNet, self).__init__()
    self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu')
    self.pool1 = Pool2D(pool_size=2, pool_stride=2)
    
    self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu')
    self.pool2 = Pool2D(pool_size=2, pool_stride=2)
    
    self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu')
    self.pool3 = Pool2D(pool_size=2, pool_stride=2)
    
    self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu')
    self.drop_ratiol = 0.5 if training else 0.0
    self.fc2 = Linear(input_dim=4096, output_dim=10)
    
    def forward(self, inputs):
    conv1 = self.conv1(inputs)  # 32 32 98 98
    pool1 = self.pool1(conv1)  # 32 32 49 49
    
    conv2 = self.conv2(pool1)  # 32 32 47 47
    pool2 = self.pool2(conv2)  # 32 32 23 23
    
    conv3 = self.conv3(pool2)  # 32 64 21 21
    pool3 = self.pool3(conv3)  # 32 64 10 10
    
    rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1])
    fc1 = self.fc1(rs_1)
    drop1 = fluid.layers.dropout(fc1, self.drop_ratiol)
    y = self.fc2(drop1)
    
    return y

    动态图进行训练

    with fluid.dygraph.guard():
    
    model=LeNet(True) #模型实例化 (修改)
    model.train() #训练模式
    # opt=fluid.optimizer.SGDOptimizer(learning_rate=0.01, parameter_list=model.parameters())#优化器选用SGD随机梯度下降,学习率为0.001.
    opt =fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameter_list=model.parameters())  # 修改
    
    epochs_num=150 #迭代次数(修改20->150)
    /*
    60epoch差不多能达到90+
    建议100左右
    */
    
    for pass_num in range(epochs_num):
    
    for batch_id,data in enumerate(train_reader()):
    
    images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
    
    labels = np.array([x[1] for x in data]).astype('int64')
    labels = labels[:, np.newaxis]
    # print(images.shape)
    image=fluid.dygraph.to_variable(images)
    label=fluid.dygraph.to_variable(labels)
    logits=model(image)  # 预测  (修改)
    pred = fluid.layers.softmax(logits)
    # print(predict)
    # loss=fluid.layers.cross_entropy(predict,label)
    loss = fluid.layers.softmax_with_cross_entropy(logits, label)  # 修改
    avg_loss=fluid.layers.mean(loss)#获取loss值
    
    acc=fluid.layers.accuracy(pred, label)#计算精度  (修改)
    
    if batch_id!=0 and batch_id%50==0:
    print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
    
    avg_loss.backward()
    opt.minimize(avg_loss)
    model.clear_gradients()
    
    fluid.save_dygraph(model.state_dict(),'LeNet')#保存模型  修改

    模型校验

    with fluid.dygraph.guard():
    accs = []
    model_dict, _ = fluid.load_dygraph('LeNet')  # 修改
    model = LeNet(True)  # 修改
    model.load_dict(model_dict) #加载模型参数
    model.eval() #训练模式
    for batch_id,data in enumerate(test_reader()):#测试集
    images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
    labels = np.array([x[1] for x in data]).astype('int64')
    labels = labels[:, np.newaxis]
    
    image=fluid.dygraph.to_variable(images)
    label=fluid.dygraph.to_variable(labels)
    
    predict=model(image)
    acc=fluid.layers.accuracy(predict,label)
    accs.append(acc.numpy()[0])
    avg_acc = np.mean(accs)
    print(avg_acc)

    笔者在这里训练了一下,正确率达到了88%,如果在模型部分再加一层卷积,笔者训练能够达到95% 大家可以自行修改

    读取预测图像,进行预测

    def load_image(path):
    img = Image.open(path)
    img = img.resize((100, 100), Image.ANTIALIAS)
    img = np.array(img).astype('float32')
    img = img.transpose((2, 0, 1))
    img = img/255.0
    print(img.shape)
    return img
    
    #构建预测动态图过程
    with fluid.dygraph.guard():
    infer_path = '手势.JPG'
    model=LeNet(False)#模型实例化
    model_dict,_=fluid.load_dygraph('LeNet')  # 修改
    model.load_dict(model_dict)#加载模型参数
    model.eval()#评估模式
    infer_img = load_image(infer_path)
    infer_img=np.array(infer_img).astype('float32')
    infer_img=infer_img[np.newaxis,:, : ,:]
    infer_img = fluid.dygraph.to_variable(infer_img)
    result=model(infer_img)
    display(Image.open('手势.JPG'))
    print(np.argmax(result.numpy()))


    我们可以看见成功预测标签为5的手势,虽然成功率仅有88左右,但作为我们的案例无疑是成功的,大家可以自行修改超参数和网络。
    数据集笔者刚来,还不知道怎么上传,没看见有这个按键
    给大家贴个课程链接

    • 点赞 5
    • 收藏
    • 分享
    • 文章举报
    Nullius 发布了4 篇原创文章 · 获赞 14 · 访问量 1860 私信 关注
    内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
    标签: 
    相关文章推荐