您的位置:首页 > 编程语言

Tensorflow2.0:Yolo v3代码详解(三)

2020-03-06 12:27 1136 查看

这次主要是针对yolov3执行测试过程的代码解析,第一部分是主体train文件代码解析,第二部分是针对utils文件代码解析,第三部分是针对config文件代码解析

第一部分 针对train文件代码解析

yolov3文件代码可以在Tensorflow2.0:Yolo v3代码详解(一)找到

import cv2
import os
import shutil
import numpy as np
import tensorflow as tf
import core.utils as utils
from core.config import cfg
from core.yolov3 import YOLOv3, decode

INPUT_SIZE = 416  # 输入模型时图片的尺寸
NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))  # 类别的数目
CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)  # 类别的名称
# 路径名称
predicted_dir_path = '../mAP/predicted'
ground_truth_dir_path = '../mAP/ground-truth'
# 若路径下有文件,则删除
if os.path.exists(predicted_dir_path):
shutil.rmtree(predicted_dir_path)
if os.path.exists(ground_truth_dir_path):
shutil.rmtree(ground_truth_dir_path)
if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH):
shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)
# 创建路径
os.mkdir(predicted_dir_path)
os.mkdir(ground_truth_dir_path)
os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)
# 确定模型输入
input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3])
# 确定模型输出
feature_maps = YOLOv3(input_layer)
bbox_tensors = []
for i, fm in enumerate(feature_maps):
bbox_tensor = decode(fm, i)
bbox_tensors.append(bbox_tensor)
# 构建模型
model = tf.keras.Model(input_layer, bbox_tensors)
# 加载模型参数
model.load_weights("./yolov3")
# 进行测试
with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
for num, line in enumerate(annotation_file):   # enumerate 使得annotation_file每个元素前面加一个序号,即num
# 第一步:读取原始图片和图上标注框信息,并在指定路径下写入txt
# 去除首尾空格,以空格来间隔
annotation = line.strip().split() # split()会按照指定的符号来分割成不同个元素
image_path = annotation[0]
# 读取原始图片
image_name = image_path.split('/')[-1]  # split将以/作为间隔,提取图片名称
image = cv2.imread(image_path)  # cv2.imread接口读图像,读进来直接是BGR 格式数据格式在 0~255
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # cv2.cvtColor 将图像转换为RGB
# 读取原始图片上的真实标注框信息
# 方法:map将字符类型变为数值类型 list将生成器对象转换输出
bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]])

if len(bbox_data_gt) == 0:
bboxes_gt = []
classes_gt = []
else:
bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]

# 生成存储带有真实标注框的图片路径
ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt')  # os.path.join拼接路径名称
print('=> ground truth of %s:' % image_name)
num_bbox_gt = len(bboxes_gt)  # 标注框的数量
# 将图片上真实标注框信息写入txt文件中
with open(ground_truth_path, 'w') as f:
for i in range(num_bbox_gt):
class_name = CLASSES[classes_gt[i]]
# list变量赋值技巧:将list各个元素赋值给别的变量
xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
# ' '.join() 将列表里面的元素都合成一个,以空格符作为间隔 '\n'表示换行
bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
f.write(bbox_mess)
print('\t' + str(bbox_mess).strip())  # '\t' 横向制表符

print('=> predict result of %s:' % image_name)
predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt')
image_size = image.shape[:2]

# 第二步:图像预处理
# 具体:图像比例缩放-填充-均一化
image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE])  # np.copy深拷贝 不会因为image改变而改变
image_data = image_data[np.newaxis, ...].astype(np.float32)

# 第三步:模型预测输出
# 方法:model.predict
# 具体:将输出进行格式转换
pred_bbox = model.predict(image_data)
pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]  # shape [3, -1, 85]
pred_bbox = tf.concat(pred_bbox, axis=0)  # shape [-1, 85]  所有anchor生成的(x, y, w, h, score, probability)

# 第四步:确定预测框的信息
# 具体:确定预测框在原始图片位置--确定超出边界的预测框索引-确定分数大于一定阈值的预测框索引--确定概率最大所对应类别索引
#       --确定满足三个索引的预测框  其中分数=置信值*分类最大概率值
# bboxes格式为[预测框的数量,预测框位置+分数+类别] shape为[-1,6]
bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD)

# 第五步:预测框冗余处理
# 具体:找出拥有该类别最大分数的预测框-存储该预测框-计算该预测框与其他预测框的Iou-根据Iou相关条件删除预测框
#       -剩下的预测框继续执行上述四个步骤,直至没有预测框
bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms')

# 第六步:绘制预测框
if cfg.TEST.DECTECTED_IMAGE_PATH is not None:
image = utils.draw_bbox(image, bboxes)
cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH+image_name, image)

with open(predict_result_path, 'w') as f:
for bbox in bboxes:
coor = np.array(bbox[:4], dtype=np.int32)
score = bbox[4]
class_ind = int(bbox[5])
class_name = CLASSES[class_ind]
score = '%.4f' % score
xmin, ymin, xmax, ymax = list(map(str, coor))
bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n'
f.write(bbox_mess)
print('\t' + str(bbox_mess).strip())

第二部分 针对utils文件代码解析

import cv2
import random
import colorsys
import numpy as np
from core.config import cfg

def load_weights(model, weights_file):
"""
I agree that this code is very ugly, but I don’t know any better way of doing it.
"""
wf = open(weights_file, 'rb')
major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)

j = 0
for i in range(75):

conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'
# 获取模型指定名称的网络层
# 方法:model.get_layer
conv_layer = model.get_layer(conv_layer_name)
filters = conv_layer.filters
k_size = conv_layer.kernel_size[0]
in_dim = conv_layer.input_shape[-1]

if i not in [58, 66, 74]:
# darknet weights: [beta, gamma, mean, variance]
bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
# tf weights: [gamma, beta, mean, variance]
bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
bn_layer = model.get_layer(bn_layer_name)
j += 1
else:
conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)

# darknet shape (out_dim, in_dim, height, width)
conv_shape = (filters, in_dim, k_size, k_size)
conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
# tf shape (height, width, in_dim, out_dim)
conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])

if i not in [58, 66, 74]:
conv_layer.set_weights([conv_weights])
bn_layer.set_weights(bn_weights)
else:
conv_layer.set_weights([conv_weights, conv_bias])

assert len(wf.read()) == 0, 'failed to read all data'
wf.close()

# 读取文件,输出一个类别名称的字典
def read_class_names(class_file_name):
'''loads class name from a file'''
names = {}
with open(class_file_name, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names

def get_anchors(anchors_path):
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = np.array(anchors.split(','), dtype=np.float32)
return anchors.reshape(3, 3, 2)

# 图像预处理
# 具体:图像比例缩放-填充-均一化
#       标注框:
def image_preporcess(image, target_size, gt_boxes=None):

# 比例缩放---cv2.resize
ih, iw = target_size
h,  w, _ = image.shape
scale = min(iw/w, ih/h)
nw, nh = int(scale * w), int(scale * h)
image_resized = cv2.resize(image, (nw, nh))
# 不足地方进行填充
image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
dw, dh = (iw - nw) // 2, (ih-nh) // 2
image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
# 图像数值均一化
image_paded = image_paded / 255.
# 标注框位置和宽高按比例缩放和修改
if gt_boxes is None:
return image_paded
else:
gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_paded, gt_boxes

# 绘制预测框
def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True):
"""
bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
"""

num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

random.seed(0)
random.shuffle(colors)
random.seed(None)

for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
fontScale = 0.5
score = bbox[4]
class_ind = int(bbox[5])
# 预测框颜色
bbox_color = colors[class_ind]
# 预测框厚度
bbox_thick = int(0.6 * (image_h + image_w) / 600)
# 预测框的左上和右下坐标
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
# 绘制预测框
# 方法:cv2.rectangle
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)

# 增加标注信息和框
# 方法:cv2.getTextSize 获取字符串高度与宽度
#       cv2.putText 在图片上加文字  格式为[图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度]
if show_label:
bbox_mess = '%s: %.2f' % (classes[class_ind], score)
# 计算文本字符串的高度与宽度
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
# 绘制标注信息的框
cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1)
# 在图片上增加标注信息
cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
return image

# 计算Iou
def bboxes_iou(boxes1, boxes2):

boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)

boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)

return ious

# 清除冗余的预测框--极大值抑制处理(NMS)
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes: (xmin, ymin, xmax, ymax, score, class)

Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
https://github.com/bharatsingh430/soft-nms
"""
# 确定预测框的类别
# 方法:set去除重复项 list变成列表格式
classes_in_img = list(set(bboxes[:, 5]))

best_bboxes = []
for cls in classes_in_img: # 对于每个类别来说:
# 1. 找出相同类别的预测框
cls_mask = (bboxes[:, 5] == cls)  # 技巧:确定符合条件的索引
cls_bboxes = bboxes[cls_mask]
# 2. 清除指定类别下冗余的预测框
# 具体:找出拥有该类别最大分数的预测框-存储该预测框-计算该预测框与其他预测框的Iou-根据Iou相关条件删除预测框
#       -剩下的预测框继续执行上述四个步骤,直至没有预测框
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])  # 指定类别下预测框的最大分数的索引
best_bbox = cls_bboxes[max_ind]  # 指定类别下拥有最大分数的预测框
best_bboxes.append(best_bbox)   # 存储该预测框
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])  # 除此以外的预测框
# 计算该预测框与其他预测框的iou
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
# 确定符合条件的预测框
# 方法:针对每个预测框建立数值标签weight,符合条件的数值为1,不符合条件为0
# 具体: 'nms'表示Iou大于阈值的预测框删除,'soft-nms'表示Iou经过计算小于0的预测框删除
weight = np.ones((len(iou),), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]

return best_bboxes

# 确定预测框的信息 格式为[预测框的数量,预测框位置+分数+类别] shape为[-1,6]
# 具体:确定预测框在原始图片位置--确定超出边界的预测框索引-确定分数大于一定阈值的预测框索引--确定概率最大所对应的类别
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):

valid_scale = [0, np.inf]
pred_bbox = np.array(pred_bbox)
pred_xywh = pred_bbox[:, 0:4]
pred_conf = pred_bbox[:, 4]
pred_prob = pred_bbox[:, 5:]

# # (1)
#   具体:(x, y, w, h) --> (xmin, ymin, xmax, ymax)
#   方法:np.concatenate 指定维度上进行数组拼接
pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)

# # (2) 求解预测框在原图上的位置
# 具体:(xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
# 方法:输入图片宽和长为1*1 有坐标(0.5,0.5) 那么在宽和长为2*3的图片上 该坐标为多少? 答案:(1,1.5) 利用相对位置不变原则
org_h, org_w = org_img_shape
resize_ratio = min(input_size / org_w, input_size / org_h)
dw = (input_size - resize_ratio * org_w) / 2
dh = (input_size - resize_ratio * org_h) / 2
# 去除填充的部分
pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio

# (3)超出图片边界的预测框的坐标变为为0
# 具体:xmin, ymin小于原点坐标(图片左上角)  xmax, ymax大于最大坐标(图片右上角)即被认为超出边界
# 方法: np.logical_or的逻辑判断 布尔型
pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
pred_coor[invalid_mask] = 0  # (xmin_org, ymin_org, xmax_org, ymax_org)

# (4) 确定超出边界的预测框的索引
# 具体:计算每个预测框的面积,找出面积为0的,也就是找出超出边界的box的索引
# 方法:np.multiply.reduce按维度点乘
bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))  # 符合条件的索引的列表

# # (5) 计算每个预测框的分数,确定高于分数阈值的预测框索引
#  具体:分数 = 类别概率*置信概率  置信表示该预测框内有目标的概率,类别概率指的是概率最大下所对应的类别下的概率
# 方法: 复合列表索引 > 符号输出逻辑判断
classes = np.argmax(pred_prob, axis=-1)  # 返回关于列数的索引
# 技巧:复合列表索引,第一个列表是指的行数,第二个列表是指定的列数
scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]   # shape 为多行一列
# > 符号输出逻辑判断
score_mask = scores > score_threshold
mask = np.logical_and(scale_mask, score_mask)

coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]

#  np.concatenate将 coors, scores, classes三列拼接在一起
return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)

第三部分 针对config文件的解析

# 创建一个字典
from easydict import EasyDict as edict
__C = edict()  # 创建一个字典
# Consumers can get config by: from config import cfg
cfg = __C

# YOLO options
__C.YOLO = edict()  # cfg字典里面创建键YOLO和键值,键值是一个字典

# Set the class name
__C.YOLO.CLASSES = "./data/classes/coco.names"   # 创建键值,其值是一个路径
__C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt"
__C.YOLO.STRIDES = [8, 16, 32]
__C.YOLO.ANCHOR_PER_SCALE = 3
__C.YOLO.IOU_LOSS_THRESH = 0.5

# Train options
__C.TRAIN = edict()

__C.TRAIN.ANNOT_PATH = "./data/dataset/yymnist_train.txt"
__C.TRAIN.BATCH_SIZE = 4
# __C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
__C.TRAIN.INPUT_SIZE = [416]
__C.TRAIN.DATA_AUG = True
__C.TRAIN.LR_INIT = 1e-3
__C.TRAIN.LR_END = 1e-6
__C.TRAIN.WARMUP_EPOCHS = 2
__C.TRAIN.EPOCHS = 30

# TEST options
__C.TEST = edict()
__C.TEST.ANNOT_PATH = "./data/dataset/yymnist_test.txt"
__C.TEST.BATCH_SIZE = 2
__C.TEST.INPUT_SIZE = 544
__C.TEST.DATA_AUG = False
__C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/"
__C.TEST.SCORE_THRESHOLD = 0.3
__C.TEST.IOU_THRESHOLD = 0.45

print(cfg)
  • 点赞
  • 收藏
  • 分享
  • 文章举报
DocPark 发布了30 篇原创文章 · 获赞 2 · 访问量 791 私信 关注
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: