DeepLearning.ai code笔记4:卷积神经网络
2018-04-03 17:25
525 查看
1、卷积基本结构
Stride 步长: 滤波器在原始图片上每次水平或垂直移动的距离。
卷积大小的计算,滤波器大小为 f ,步长为 s , 填充为 p, 输入图片为 n×nn×n,则卷积得到的特征图大小为, 下标 f 表示向下取整:
[n+2p−fs]f×[n+2p−fs]f[n+2p−fs]f×[n+2p−fs]f
主要步骤:
Convolution functions, including:
Zero Padding 零填充
Convolve window 卷积窗口
Convolution forward
Convolution backward (optional)
Pooling functions, including:
Pooling forward
Create mask
Distribute value
Pooling backward (optional)
2、ResNet
2、YOLO
If you were to run your session in a for loop over all your images. Here’s what you would get:
<
faf8
p>YOLO is a state-of-the-art object detection model that is fast and accurate
It runs an input image through a CNN which outputs a 19x19x5x85 dimensional volume.
The encoding can be seen as a grid where each of the 19x19 cells contains information about 5 boxes.
You filter through all the boxes using non-max suppression. Specifically:
Score thresholding on the probability of detecting a class to keep only accurate (high probability) boxes
Intersection over Union (IoU) thresholding to eliminate overlapping boxes
使用非最大抑制来过滤所有框。特别对检测类的概率进行阈值评分,以保留仅准确(高概率)的框,通过联合(iou)阈值交集以消除重叠框。
Stride 步长: 滤波器在原始图片上每次水平或垂直移动的距离。
卷积大小的计算,滤波器大小为 f ,步长为 s , 填充为 p, 输入图片为 n×nn×n,则卷积得到的特征图大小为, 下标 f 表示向下取整:
[n+2p−fs]f×[n+2p−fs]f[n+2p−fs]f×[n+2p−fs]f
主要步骤:
Convolution functions, including:
Zero Padding 零填充
Convolve window 卷积窗口
Convolution forward
Convolution backward (optional)
Pooling functions, including:
Pooling forward
Create mask
Distribute value
Pooling backward (optional)
2、ResNet
2、YOLO
If you were to run your session in a for loop over all your images. Here’s what you would get:
<
faf8
p>YOLO is a state-of-the-art object detection model that is fast and accurate
It runs an input image through a CNN which outputs a 19x19x5x85 dimensional volume.
The encoding can be seen as a grid where each of the 19x19 cells contains information about 5 boxes.
You filter through all the boxes using non-max suppression. Specifically:
Score thresholding on the probability of detecting a class to keep only accurate (high probability) boxes
Intersection over Union (IoU) thresholding to eliminate overlapping boxes
使用非最大抑制来过滤所有框。特别对检测类的概率进行阈值评分,以保留仅准确(高概率)的框,通过联合(iou)阈值交集以消除重叠框。
""" @Author : Peng @Time : 2018/3/23 info : YOLO的使用, 80个类,5个anchor boxes,Feature Map(19*19) """ import numpy as np import matplotlib.pyplot as plt import keras import tensorflow as tf import os import scipy.misc from LearningDL.task4_3.yad2k.models import keras_darknet19, keras_yolo from LearningDL.task4_3 import yolo_utils import keras.backend as K K.set_image_data_format('channels_last') def yolo_filters_boxes(box_confidence, boxes, box_class_probs, threshold=0.6): """ 通过阀值过滤候选框 :param box_confidence: shape[19,19,5,1], 包含19*19个单元中每5个anchor boxes的置信度, 训练出来的 :param boxes: shape[19.19,5,4], 包含19*19个单元每个对应的5个anchor boxes的[bx, by, bh, bw] :param box_class_probs: shape[19, 19, 5, 80], 包含每个anchor boxes中含有80个类的概率系数 :param threshold: IoU阀值 :return: scores, boxes, classes, 选定的boxes对应的分数、位置和包含类 """ box_scores = box_confidence * box_class_probs # shape[19,19,5,80] box_classes = K.argmax(box_scores, axis=-1) # argmax返回最大值的“坐标”, 标识类, shape[19,19,5] box_class_scores = K.max(box_scores, axis=-1) # max 返回最大值“的值”, 标识值, shape[19,19,5] # 使用掩码获取IoU不小于阀值的anchor boxex及相关信息,即将19x19x5个box符合条件的写到一个列表中,shape[None,?] filtering_mask = (box_class_scores >= threshold) # shape[19,19,5] # [19,19,5] -> (?,) ?表示不确定,逗号后面没有数据表示一维 scores = tf.boolean_mask(box_class_scores, filtering_mask) # (?,4) boxes = tf.boolean_mask(boxes, filtering_mask) # (?,) classes = tf.boolean_mask(box_classes, filtering_mask) return scores, boxes, classes def iou(box1, box2): """ iou算法的实现 :param box1: 包含左上角坐标和右下角坐标(x1,y1,x2,y2) :param box2: 包含左上角坐标和右下角坐标(x1,y1,x2,y2) :return: """ # 求交集的左上和右下坐标,往中间“挤” x1 = max(box1[0], box2[0]) y1 = max(box1[1], box2[1]) x2 = min(box1[2], box2[2]) y2 = min(box1[3], box2[3]) inter_area = np.abs(x2 - x1) * np.abs(y2 - y1) # 求交集面积,np.abs绝对值 box1_area = np.abs(box1[2] - box1[0]) * np.abs(box1[3] - box1[1]) box2_area = np.abs(box2[2] - box2[0]) * np.abs(box2[3] - box2[1]) union_area = box1_area + box2_area - inter_area iou_value = inter_area / union_area return iou_value def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_thsehold=0.5): """ 非最大值抑制算法保留最佳boxes :param scores: shape(None,),boxes含有某个类的概率 :param boxes: shape(None,4),boxes列表 :param classes: shape(None,),boxes含有的是什么类 :param max_boxes: int,保留少个max_boxes :param iou_thsehold: float, 阀值 :return: scores[,None], boxes[4, None], classes[, None] """ max_boxes_tensor = K.variable(max_boxes, dtype=tf.int32) K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # 获取非最大值过滤索引列表 nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_thsehold) # 按索引抽取聚集 scores = K.gather(scores, nms_indices) boxes = K.gather(boxes, nms_indices) classes = K.gather(classes, nms_indices) return scores, boxes, classes def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=10, score_threshold=0.6, iou_threshold=0.5): """ 将YOLO的预测框输出使用我们的过滤算法得到最终的scores,boxes,classes YOLO 没有将非最大抑制算法作为核心部分,这里使用的YOLO预训练模型其输出结果仍然是未过滤的可能重叠的anchor boxes :param yolo_outputs: :param image_shape: :param max_boxes: :param score_threshold: 取最大值然后抛弃其他 scores>score_threshold 的boxes :param iou_threshold: :return: """ # 获取yolo的输出 box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs # 将(中点,宽高)改为坐标形式 boxes = keras_yolo.yolo_boxes_to_corners(box_xy, box_wh) # 使用前面定义的过滤算法 scores, boxes, classes = yolo_filters_boxes(box_confidence, boxes, box_class_probs, score_threshold) # 缩放图片以进行boxes标记 boxes = yolo_utils.scale_boxes(boxes, image_shape) scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold) return scores, boxes, classes def unit_test(): """ yolo_filters_boxes 通过阀值过滤候选框测试""" # with tf.Session() as session: # box_confidence = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1) # boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed=1) # box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1) # scores, boxes, classes = yolo_filters_boxes(box_confidence, boxes, box_class_probs, threshold=0.5) # print("scores[2] = " + str(scores[2].eval())) # print("boxes[2] = " + str(boxes[2].eval())) # print("classes[2] = " + str(classes[2].eval())) # print("scores.shape = " + str(scores.shape)) # print("boxes.shape = " + str(boxes.shape)) # print("classes.shape = " + str(classes.shape)) """ IoU 测试 """ # box1 = (2, 1, 4, 3) # box2 = (1, 2, 3, 4) # print('iou value is {}'.format(iou(box1, box2))) """yolo_non_max_suppression 非最大值抑制算法预测""" # with tf.Session() as session: # scores = tf.random_normal([54, ], mean=1, stddev=4, seed=1) # boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed=1) # classes = tf.random_normal([54, ], mean=1, stddev=4, seed=1) # scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes) # print("scores[2] = " + str(scores[2].eval())) # print("boxes[2] = " + str(boxes[2].eval())) # print("classes[2] = " + str(classes[2].eval())) # print("scores.shape = " + str(scores.eval().shape)) # print("boxes.shape = " + str(boxes.eval().shape)) # print("classes.shape = " + str(classes.eval().shape)) """ yolo_eval 过滤boxes的测试 """ with tf.Session() as session: yolo_outputs = (tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1), tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1), tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1), tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)) scores, boxes, classes = yolo_eval(yolo_outputs) print("scores[2] = " + str(scores[2].eval())) print("boxes[2] = " + str(boxes[2].eval())) print("classes[2] = " + str(classes[2].eval())) print("scores.shape = " + str(scores.eval().shape)) print("boxes.shape = " + str(boxes.eval().shape)) print("classes.shape = " + str(classes.eval().shape)) def predict(sess, image_file, scores, boxes, classes, yolo_model, class_name): """ 预测 :param sess: :param image_file: :param scores: :param boxes: :param classes: :param yolo_model: :param class_name: :return: """ image, image_data = yolo_utils.preprocess_image("images/" + image_file, model_image_size=(608, 608)) out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input: image_data, K.learning_phase(): 0}) print("Found {} boxes for {}".format(len(out_boxes), image_file)) colors = yolo_utils.generate_colors(class_name) yolo_utils.draw_boxes(image, out_scores, out_boxes, out_classes, class_name, colors) image.save(os.path.join("out", image_file), quality=90) # Display the results in the notebook output_image = scipy.misc.imread(os.path.join("out", image_file)) plt.imshow(output_image) plt.show() return out_scores, out_boxes, out_classes if __name__ == '__main__': # unit_test() # yolo 图片测试 sess = K.get_session() class_name = yolo_utils.read_classes("model_data/coco_classes.txt") anchors = yolo_utils.read_anchors("model_data/yolo_anchors.txt") image_shape = (720., 1280.) # 加载yolo_v2预训练模型 yolo_model = keras.models.load_model("model_data/yolo.h5") yolo_model.summary() # 获取yolo输出 yolo_outputs = keras_yolo.yolo_head(yolo_model.output, anchors, len(class_name)) # 获取过滤后的预测信息 scores, boxes, classes = yolo_eval(yolo_outputs, image_shape) out_scores, out_boxes, out_classes = predict(sess, "test.jpg", scores, boxes, classes, yolo_model, class_name)
相关文章推荐
- Deep Learning论文笔记之(四)CNN卷积神经网络推导和实现
- Convolutional Neural Networks卷积神经网络
- 《ImageNet Classification with Deep Convolutional Neural Networks》ImageNet与深卷积神经网络分类
- Deep Learning 系列(3):CNN(卷积神经网络)
- lecture5-对象识别与卷积神经网络
- Convolutional Neural Networks卷积神经网络
- 卷积神经网络的网络结构——以LeNet-5为例
- CNN卷积神经网络
- 卷积神经网络
- 卷积神经网络CNN
- Spark MLlib Deep Learning Convolution Neural Network (深度学习-卷积神经网络)3.1
- DeepLearning tutorial(4)CNN卷积神经网络原理简介+代码详解
- 卷积神经网络
- Theano入门——卷积神经网络
- 卷积神经网络全连接层转换为卷积层获得heatmap
- 单层卷积神经网络的实现
- 卷积神经网络 资料
- 深度学习:卷积神经网络(convolution neural network)
- Deep Learning论文笔记之(四)CNN卷积神经网络推导和实现
- 深度学习(DL)与卷积神经网络(CNN)学习笔记随笔-04-基于Python的LeNet之MLP