您的位置:首页 > 理论基础 > 计算机网络

DeepLearning.ai code笔记4:卷积神经网络

2018-04-03 17:25 525 查看
1、卷积基本结构

Stride 步长: 滤波器在原始图片上每次水平或垂直移动的距离。

卷积大小的计算,滤波器大小为 f ,步长为 s , 填充为 p, 输入图片为 n×nn×n,则卷积得到的特征图大小为, 下标 f 表示向下取整:

[n+2p−fs]f×[n+2p−fs]f[n+2p−fs]f×[n+2p−fs]f

主要步骤:

Convolution functions, including:

Zero Padding 零填充

Convolve window 卷积窗口

Convolution forward

Convolution backward (optional)

Pooling functions, including:

Pooling forward

Create mask

Distribute value

Pooling backward (optional)

2、ResNet



2、YOLO

If you were to run your session in a for loop over all your images. Here’s what you would get:

<
faf8
p>YOLO is a state-of-the-art object detection model that is fast and accurate

It runs an input image through a CNN which outputs a 19x19x5x85 dimensional volume.

The encoding can be seen as a grid where each of the 19x19 cells contains information about 5 boxes.

You filter through all the boxes using non-max suppression. Specifically:

Score thresholding on the probability of detecting a class to keep only accurate (high probability) boxes

Intersection over Union (IoU) thresholding to eliminate overlapping boxes

使用非最大抑制来过滤所有框。特别对检测类的概率进行阈值评分,以保留仅准确(高概率)的框,通过联合(iou)阈值交集以消除重叠框。

"""
@Author : Peng
@Time : 2018/3/23
info :  YOLO的使用, 80个类,5个anchor boxes,Feature Map(19*19)
"""
import numpy as np
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
import os
import scipy.misc

from LearningDL.task4_3.yad2k.models import keras_darknet19, keras_yolo
from LearningDL.task4_3 import yolo_utils

import keras.backend as K

K.set_image_data_format('channels_last')

def yolo_filters_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
"""
通过阀值过滤候选框
:param box_confidence: shape[19,19,5,1], 包含19*19个单元中每5个anchor boxes的置信度, 训练出来的
:param boxes: shape[19.19,5,4], 包含19*19个单元每个对应的5个anchor boxes的[bx, by, bh, bw]
:param box_class_probs: shape[19, 19, 5, 80], 包含每个anchor boxes中含有80个类的概率系数
:param threshold: IoU阀值
:return: scores, boxes, classes, 选定的boxes对应的分数、位置和包含类
"""
box_scores = box_confidence * box_class_probs  # shape[19,19,5,80]
box_classes = K.argmax(box_scores, axis=-1)  # argmax返回最大值的“坐标”, 标识类, shape[19,19,5]
box_class_scores = K.max(box_scores, axis=-1)  # max 返回最大值“的值”, 标识值, shape[19,19,5]

# 使用掩码获取IoU不小于阀值的anchor boxex及相关信息,即将19x19x5个box符合条件的写到一个列表中,shape[None,?]
filtering_mask = (box_class_scores >= threshold)  # shape[19,19,5]
# [19,19,5] -> (?,) ?表示不确定,逗号后面没有数据表示一维
scores = tf.boolean_mask(box_class_scores, filtering_mask)
# (?,4)
boxes = tf.boolean_mask(boxes, filtering_mask)
# (?,)
classes = tf.boolean_mask(box_classes, filtering_mask)

return scores, boxes, classes

def iou(box1, box2):
"""
iou算法的实现
:param box1: 包含左上角坐标和右下角坐标(x1,y1,x2,y2)
:param box2: 包含左上角坐标和右下角坐标(x1,y1,x2,y2)
:return:
"""
# 求交集的左上和右下坐标,往中间“挤”
x1 = max(box1[0], box2[0])
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])

inter_area = np.abs(x2 - x1) * np.abs(y2 - y1)  # 求交集面积,np.abs绝对值
box1_area = np.abs(box1[2] - box1[0]) * np.abs(box1[3] - box1[1])
box2_area = np.abs(box2[2] - box2[0]) * np.abs(box2[3] - box2[1])
union_area = box1_area + box2_area - inter_area

iou_value = inter_area / union_area
return iou_value

def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_thsehold=0.5):
"""
非最大值抑制算法保留最佳boxes
:param scores: shape(None,),boxes含有某个类的概率
:param boxes: shape(None,4),boxes列表
:param classes: shape(None,),boxes含有的是什么类
:param max_boxes: int,保留少个max_boxes
:param iou_thsehold: float, 阀值
:return: scores[,None], boxes[4, None], classes[, None]
"""
max_boxes_tensor = K.variable(max_boxes, dtype=tf.int32)
K.get_session().run(tf.variables_initializer([max_boxes_tensor]))

# 获取非最大值过滤索引列表
nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_thsehold)
# 按索引抽取聚集
scores = K.gather(scores, nms_indices)
boxes = K.gather(boxes, nms_indices)
classes = K.gather(classes, nms_indices)

return scores, boxes, classes

def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=10, score_threshold=0.6, iou_threshold=0.5):
"""
将YOLO的预测框输出使用我们的过滤算法得到最终的scores,boxes,classes
YOLO 没有将非最大抑制算法作为核心部分,这里使用的YOLO预训练模型其输出结果仍然是未过滤的可能重叠的anchor boxes
:param yolo_outputs:
:param image_shape:
:param max_boxes:
:param score_threshold: 取最大值然后抛弃其他 scores>score_threshold 的boxes
:param iou_threshold:
:return:
"""
# 获取yolo的输出
box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
# 将(中点,宽高)改为坐标形式
boxes = keras_yolo.yolo_boxes_to_corners(box_xy, box_wh)

# 使用前面定义的过滤算法
scores, boxes, classes = yolo_filters_boxes(box_confidence, boxes, box_class_probs, score_threshold)

# 缩放图片以进行boxes标记
boxes = yolo_utils.scale_boxes(boxes, image_shape)

scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)
return scores, boxes, classes

def unit_test():
""" yolo_filters_boxes 通过阀值过滤候选框测试"""
# with tf.Session() as session:
#     box_confidence = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1)
#     boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed=1)
#     box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)
#     scores, boxes, classes = yolo_filters_boxes(box_confidence, boxes, box_class_probs, threshold=0.5)
#     print("scores[2] = " + str(scores[2].eval()))
#     print("boxes[2] = " + str(boxes[2].eval()))
#     print("classes[2] = " + str(classes[2].eval()))
#     print("scores.shape = " + str(scores.shape))
#     print("boxes.shape = " + str(boxes.shape))
#     print("classes.shape = " + str(classes.shape))

""" IoU 测试 """
# box1 = (2, 1, 4, 3)
# box2 = (1, 2, 3, 4)
# print('iou value is {}'.format(iou(box1, box2)))

"""yolo_non_max_suppression 非最大值抑制算法预测"""
# with tf.Session() as session:
#     scores = tf.random_normal([54, ], mean=1, stddev=4, seed=1)
#     boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed=1)
#     classes = tf.random_normal([54, ], mean=1, stddev=4, seed=1)
#     scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
#     print("scores[2] = " + str(scores[2].eval()))
#     print("boxes[2] = " + str(boxes[2].eval()))
#     print("classes[2] = " + str(classes[2].eval()))
#     print("scores.shape = " + str(scores.eval().shape))
#     print("boxes.shape = " + str(boxes.eval().shape))
#     print("classes.shape = " + str(classes.eval().shape))

""" yolo_eval 过滤boxes的测试 """
with tf.Session() as session:
yolo_outputs = (tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1),
tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),
tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),
tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1))
scores, boxes, classes = yolo_eval(yolo_outputs)
print("scores[2] = " + str(scores[2].eval()))
print("boxes[2] = " + str(boxes[2].eval()))
print("classes[2] = " + str(classes[2].eval()))
print("scores.shape = " + str(scores.eval().shape))
print("boxes.shape = " + str(boxes.eval().shape))
print("classes.shape = " + str(classes.eval().shape))

def predict(sess, image_file, scores, boxes, classes, yolo_model, class_name):
"""
预测
:param sess:
:param image_file:
:param scores:
:param boxes:
:param classes:
:param yolo_model:
:param class_name:
:return:
"""
image, image_data = yolo_utils.preprocess_image("images/" + image_file, model_image_size=(608, 608))
out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes],
feed_dict={yolo_model.input: image_data, K.learning_phase(): 0})

print("Found {} boxes for {}".format(len(out_boxes), image_file))
colors = yolo_utils.generate_colors(class_name)
yolo_utils.draw_boxes(image, out_scores, out_boxes, out_classes, class_name, colors)
image.save(os.path.join("out", image_file), quality=90)
# Display the results in the notebook
output_image = scipy.misc.imread(os.path.join("out", image_file))
plt.imshow(output_image)
plt.show()

return out_scores, out_boxes, out_classes

if __name__ == '__main__':
# unit_test()

# yolo 图片测试
sess = K.get_session()
class_name = yolo_utils.read_classes("model_data/coco_classes.txt")
anchors = yolo_utils.read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.)
# 加载yolo_v2预训练模型
yolo_model = keras.models.load_model("model_data/yolo.h5")
yolo_model.summary()
# 获取yolo输出
yolo_outputs = keras_yolo.yolo_head(yolo_model.output, anchors, len(class_name))
# 获取过滤后的预测信息
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

out_scores, out_boxes, out_classes = predict(sess, "test.jpg", scores, boxes, classes, yolo_model, class_name)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  Deeplearning.ai CNN YOLO