您的位置：首页 > 编程语言

Tensorflow2.0：Yolo v3代码详解（三）

2020-03-06 12:27 1136 查看

这次主要是针对yolov3执行测试过程的代码解析，第一部分是主体train文件代码解析，第二部分是针对utils文件代码解析，第三部分是针对config文件代码解析

第一部分针对train文件代码解析

yolov3文件代码可以在Tensorflow2.0：Yolo v3代码详解（一）找到

import cv2
import os
import shutil
import numpy as np
import tensorflow as tf
import core.utils as utils
from core.config import cfg
from core.yolov3 import YOLOv3, decode

INPUT_SIZE = 416  # 输入模型时图片的尺寸
NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))  # 类别的数目
CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)  # 类别的名称
# 路径名称
predicted_dir_path = '../mAP/predicted'
ground_truth_dir_path = '../mAP/ground-truth'
# 若路径下有文件，则删除
if os.path.exists(predicted_dir_path):
shutil.rmtree(predicted_dir_path)
if os.path.exists(ground_truth_dir_path):
shutil.rmtree(ground_truth_dir_path)
if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH):
shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)
# 创建路径
os.mkdir(predicted_dir_path)
os.mkdir(ground_truth_dir_path)
os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)
# 确定模型输入
input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3])
# 确定模型输出
feature_maps = YOLOv3(input_layer)
bbox_tensors = []
for i, fm in enumerate(feature_maps):
bbox_tensor = decode(fm, i)
bbox_tensors.append(bbox_tensor)
# 构建模型
model = tf.keras.Model(input_layer, bbox_tensors)
# 加载模型参数
model.load_weights("./yolov3")
# 进行测试
with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
for num, line in enumerate(annotation_file):   # enumerate 使得annotation_file每个元素前面加一个序号，即num
# 第一步：读取原始图片和图上标注框信息，并在指定路径下写入txt
# 去除首尾空格，以空格来间隔
annotation = line.strip().split() # split()会按照指定的符号来分割成不同个元素
image_path = annotation[0]
# 读取原始图片
image_name = image_path.split('/')[-1]  # split将以/作为间隔，提取图片名称
image = cv2.imread(image_path)  # cv2.imread接口读图像，读进来直接是BGR 格式数据格式在 0~255
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # cv2.cvtColor 将图像转换为RGB
# 读取原始图片上的真实标注框信息
# 方法：map将字符类型变为数值类型 list将生成器对象转换输出
bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]])

if len(bbox_data_gt) == 0:
bboxes_gt = []
classes_gt = []
else:
bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]

# 生成存储带有真实标注框的图片路径
ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt')  # os.path.join拼接路径名称
print('=> ground truth of %s:' % image_name)
num_bbox_gt = len(bboxes_gt)  # 标注框的数量
# 将图片上真实标注框信息写入txt文件中
with open(ground_truth_path, 'w') as f:
for i in range(num_bbox_gt):
class_name = CLASSES[classes_gt[i]]
# list变量赋值技巧：将list各个元素赋值给别的变量
xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
# ' '.join() 将列表里面的元素都合成一个，以空格符作为间隔 '\n'表示换行
bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
f.write(bbox_mess)
print('\t' + str(bbox_mess).strip())  # '\t' 横向制表符

print('=> predict result of %s:' % image_name)
predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt')
image_size = image.shape[:2]

# 第二步：图像预处理
# 具体：图像比例缩放-填充-均一化
image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE])  # np.copy深拷贝 不会因为image改变而改变
image_data = image_data[np.newaxis, ...].astype(np.float32)

# 第三步：模型预测输出
# 方法：model.predict
# 具体：将输出进行格式转换
pred_bbox = model.predict(image_data)
pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]  # shape [3, -1, 85]
pred_bbox = tf.concat(pred_bbox, axis=0)  # shape [-1, 85]  所有anchor生成的(x, y, w, h, score, probability)

# 第四步：确定预测框的信息
# 具体：确定预测框在原始图片位置--确定超出边界的预测框索引-确定分数大于一定阈值的预测框索引--确定概率最大所对应类别索引
#       --确定满足三个索引的预测框  其中分数=置信值*分类最大概率值
# bboxes格式为[预测框的数量，预测框位置+分数+类别] shape为[-1,6]
bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD)

# 第五步：预测框冗余处理
# 具体：找出拥有该类别最大分数的预测框-存储该预测框-计算该预测框与其他预测框的Iou-根据Iou相关条件删除预测框
#       -剩下的预测框继续执行上述四个步骤，直至没有预测框
bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms')

# 第六步：绘制预测框
if cfg.TEST.DECTECTED_IMAGE_PATH is not None:
image = utils.draw_bbox(image, bboxes)
cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH+image_name, image)

with open(predict_result_path, 'w') as f:
for bbox in bboxes:
coor = np.array(bbox[:4], dtype=np.int32)
score = bbox[4]
class_ind = int(bbox[5])
class_name = CLASSES[class_ind]
score = '%.4f' % score
xmin, ymin, xmax, ymax = list(map(str, coor))
bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n'
f.write(bbox_mess)
print('\t' + str(bbox_mess).strip())

第二部分针对utils文件代码解析

import cv2
import random
import colorsys
import numpy as np
from core.config import cfg

def load_weights(model, weights_file):
"""
I agree that this code is very ugly, but I don’t know any better way of doing it.
"""
wf = open(weights_file, 'rb')
major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)

j = 0
for i in range(75):

conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'
# 获取模型指定名称的网络层
# 方法:model.get_layer
conv_layer = model.get_layer(conv_layer_name)
filters = conv_layer.filters
k_size = conv_layer.kernel_size[0]
in_dim = conv_layer.input_shape[-1]

if i not in [58, 66, 74]:
# darknet weights: [beta, gamma, mean, variance]
bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
# tf weights: [gamma, beta, mean, variance]
bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
bn_layer = model.get_layer(bn_layer_name)
j += 1
else:
conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)

# darknet shape (out_dim, in_dim, height, width)
conv_shape = (filters, in_dim, k_size, k_size)
conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
# tf shape (height, width, in_dim, out_dim)
conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])

if i not in [58, 66, 74]:
conv_layer.set_weights([conv_weights])
bn_layer.set_weights(bn_weights)
else:
conv_layer.set_weights([conv_weights, conv_bias])

assert len(wf.read()) == 0, 'failed to read all data'
wf.close()

# 读取文件，输出一个类别名称的字典
def read_class_names(class_file_name):
'''loads class name from a file'''
names = {}
with open(class_file_name, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names

def get_anchors(anchors_path):
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = np.array(anchors.split(','), dtype=np.float32)
return anchors.reshape(3, 3, 2)

# 图像预处理
# 具体：图像比例缩放-填充-均一化
#       标注框：
def image_preporcess(image, target_size, gt_boxes=None):

# 比例缩放---cv2.resize
ih, iw = target_size
h,  w, _ = image.shape
scale = min(iw/w, ih/h)
nw, nh = int(scale * w), int(scale * h)
image_resized = cv2.resize(image, (nw, nh))
# 不足地方进行填充
image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
dw, dh = (iw - nw) // 2, (ih-nh) // 2
image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
# 图像数值均一化
image_paded = image_paded / 255.
# 标注框位置和宽高按比例缩放和修改
if gt_boxes is None:
return image_paded
else:
gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_paded, gt_boxes

# 绘制预测框
def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True):
"""
bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
"""

num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

random.seed(0)
random.shuffle(colors)
random.seed(None)

for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
fontScale = 0.5
score = bbox[4]
class_ind = int(bbox[5])
# 预测框颜色
bbox_color = colors[class_ind]
# 预测框厚度
bbox_thick = int(0.6 * (image_h + image_w) / 600)
# 预测框的左上和右下坐标
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
# 绘制预测框
# 方法：cv2.rectangle
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)

# 增加标注信息和框
# 方法：cv2.getTextSize 获取字符串高度与宽度
#       cv2.putText 在图片上加文字  格式为[图像，文字内容， 坐标 ，字体，大小，颜色，字体厚度]
if show_label:
bbox_mess = '%s: %.2f' % (classes[class_ind], score)
# 计算文本字符串的高度与宽度
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
# 绘制标注信息的框
cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1)
# 在图片上增加标注信息
cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
return image

# 计算Iou
def bboxes_iou(boxes1, boxes2):

boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)

boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)

return ious

# 清除冗余的预测框--极大值抑制处理(NMS)
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes: (xmin, ymin, xmax, ymax, score, class)

Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
https://github.com/bharatsingh430/soft-nms
"""
# 确定预测框的类别
# 方法：set去除重复项 list变成列表格式
classes_in_img = list(set(bboxes[:, 5]))

best_bboxes = []
for cls in classes_in_img: # 对于每个类别来说:
# 1. 找出相同类别的预测框
cls_mask = (bboxes[:, 5] == cls)  # 技巧：确定符合条件的索引
cls_bboxes = bboxes[cls_mask]
# 2. 清除指定类别下冗余的预测框
# 具体：找出拥有该类别最大分数的预测框-存储该预测框-计算该预测框与其他预测框的Iou-根据Iou相关条件删除预测框
#       -剩下的预测框继续执行上述四个步骤，直至没有预测框
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])  # 指定类别下预测框的最大分数的索引
best_bbox = cls_bboxes[max_ind]  # 指定类别下拥有最大分数的预测框
best_bboxes.append(best_bbox)   # 存储该预测框
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])  # 除此以外的预测框
# 计算该预测框与其他预测框的iou
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
# 确定符合条件的预测框
# 方法：针对每个预测框建立数值标签weight，符合条件的数值为1，不符合条件为0
# 具体： 'nms'表示Iou大于阈值的预测框删除，'soft-nms'表示Iou经过计算小于0的预测框删除
weight = np.ones((len(iou),), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]

return best_bboxes

# 确定预测框的信息 格式为[预测框的数量，预测框位置+分数+类别] shape为[-1,6]
# 具体：确定预测框在原始图片位置--确定超出边界的预测框索引-确定分数大于一定阈值的预测框索引--确定概率最大所对应的类别
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):

valid_scale = [0, np.inf]
pred_bbox = np.array(pred_bbox)
pred_xywh = pred_bbox[:, 0:4]
pred_conf = pred_bbox[:, 4]
pred_prob = pred_bbox[:, 5:]

# # (1)
#   具体：(x, y, w, h) --> (xmin, ymin, xmax, ymax)
#   方法：np.concatenate 指定维度上进行数组拼接
pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)

# # (2) 求解预测框在原图上的位置
# 具体：(xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
# 方法：输入图片宽和长为1*1 有坐标(0.5,0.5) 那么在宽和长为2*3的图片上 该坐标为多少？ 答案：(1，1.5) 利用相对位置不变原则
org_h, org_w = org_img_shape
resize_ratio = min(input_size / org_w, input_size / org_h)
dw = (input_size - resize_ratio * org_w) / 2
dh = (input_size - resize_ratio * org_h) / 2
# 去除填充的部分
pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio

# (3)超出图片边界的预测框的坐标变为为0
# 具体：xmin, ymin小于原点坐标(图片左上角)  xmax, ymax大于最大坐标(图片右上角)即被认为超出边界
# 方法： np.logical_or的逻辑判断 布尔型
pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
pred_coor[invalid_mask] = 0  # (xmin_org, ymin_org, xmax_org, ymax_org)

# (4) 确定超出边界的预测框的索引
# 具体：计算每个预测框的面积，找出面积为0的，也就是找出超出边界的box的索引
# 方法：np.multiply.reduce按维度点乘
bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))  # 符合条件的索引的列表

# # (5) 计算每个预测框的分数，确定高于分数阈值的预测框索引
#  具体：分数 = 类别概率*置信概率  置信表示该预测框内有目标的概率，类别概率指的是概率最大下所对应的类别下的概率
# 方法： 复合列表索引 > 符号输出逻辑判断
classes = np.argmax(pred_prob, axis=-1)  # 返回关于列数的索引
# 技巧：复合列表索引，第一个列表是指的行数，第二个列表是指定的列数
scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]   # shape 为多行一列
# > 符号输出逻辑判断
score_mask = scores > score_threshold
mask = np.logical_and(scale_mask, score_mask)

coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]

#  np.concatenate将 coors, scores, classes三列拼接在一起
return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)

第三部分针对config文件的解析

# 创建一个字典
from easydict import EasyDict as edict
__C = edict()  # 创建一个字典
# Consumers can get config by: from config import cfg
cfg = __C

# YOLO options
__C.YOLO = edict()  # cfg字典里面创建键YOLO和键值，键值是一个字典

# Set the class name
__C.YOLO.CLASSES = "./data/classes/coco.names"   # 创建键值，其值是一个路径
__C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt"
__C.YOLO.STRIDES = [8, 16, 32]
__C.YOLO.ANCHOR_PER_SCALE = 3
__C.YOLO.IOU_LOSS_THRESH = 0.5

# Train options
__C.TRAIN = edict()

__C.TRAIN.ANNOT_PATH = "./data/dataset/yymnist_train.txt"
__C.TRAIN.BATCH_SIZE = 4
# __C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
__C.TRAIN.INPUT_SIZE = [416]
__C.TRAIN.DATA_AUG = True
__C.TRAIN.LR_INIT = 1e-3
__C.TRAIN.LR_END = 1e-6
__C.TRAIN.WARMUP_EPOCHS = 2
__C.TRAIN.EPOCHS = 30

# TEST options
__C.TEST = edict()
__C.TEST.ANNOT_PATH = "./data/dataset/yymnist_test.txt"
__C.TEST.BATCH_SIZE = 2
__C.TEST.INPUT_SIZE = 544
__C.TEST.DATA_AUG = False
__C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/"
__C.TEST.SCORE_THRESHOLD = 0.3
__C.TEST.IOU_THRESHOLD = 0.45

print(cfg)

点赞
收藏
分享
文章举报

DocPark 发布了30 篇原创文章 · 获赞 2 · 访问量 791 私信关注

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航

Tensorflow2.0：Yolo v3代码详解（三）

第一部分 针对train文件代码解析

第二部分 针对utils文件代码解析

第三部分 针对config文件的解析

第一部分针对train文件代码解析

第二部分针对utils文件代码解析

第三部分针对config文件的解析