Tensorflow2.0:Yolo v3代码详解(三)
2020-03-06 12:27
1136 查看
这次主要是针对yolov3执行测试过程的代码解析,第一部分是主体train文件代码解析,第二部分是针对utils文件代码解析,第三部分是针对config文件代码解析
第一部分 针对train文件代码解析
yolov3文件代码可以在Tensorflow2.0:Yolo v3代码详解(一)找到
import cv2 import os import shutil import numpy as np import tensorflow as tf import core.utils as utils from core.config import cfg from core.yolov3 import YOLOv3, decode INPUT_SIZE = 416 # 输入模型时图片的尺寸 NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) # 类别的数目 CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) # 类别的名称 # 路径名称 predicted_dir_path = '../mAP/predicted' ground_truth_dir_path = '../mAP/ground-truth' # 若路径下有文件,则删除 if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) # 创建路径 os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) # 确定模型输入 input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3]) # 确定模型输出 feature_maps = YOLOv3(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) # 构建模型 model = tf.keras.Model(input_layer, bbox_tensors) # 加载模型参数 model.load_weights("./yolov3") # 进行测试 with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): # enumerate 使得annotation_file每个元素前面加一个序号,即num # 第一步:读取原始图片和图上标注框信息,并在指定路径下写入txt # 去除首尾空格,以空格来间隔 annotation = line.strip().split() # split()会按照指定的符号来分割成不同个元素 image_path = annotation[0] # 读取原始图片 image_name = image_path.split('/')[-1] # split将以/作为间隔,提取图片名称 image = cv2.imread(image_path) # cv2.imread接口读图像,读进来直接是BGR 格式数据格式在 0~255 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # cv2.cvtColor 将图像转换为RGB # 读取原始图片上的真实标注框信息 # 方法:map将字符类型变为数值类型 list将生成器对象转换输出 bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] # 生成存储带有真实标注框的图片路径 ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') # os.path.join拼接路径名称 print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) # 标注框的数量 # 将图片上真实标注框信息写入txt文件中 with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] # list变量赋值技巧:将list各个元素赋值给别的变量 xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) # ' '.join() 将列表里面的元素都合成一个,以空格符作为间隔 '\n'表示换行 bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) # '\t' 横向制表符 print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') image_size = image.shape[:2] # 第二步:图像预处理 # 具体:图像比例缩放-填充-均一化 image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) # np.copy深拷贝 不会因为image改变而改变 image_data = image_data[np.newaxis, ...].astype(np.float32) # 第三步:模型预测输出 # 方法:model.predict # 具体:将输出进行格式转换 pred_bbox = model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] # shape [3, -1, 85] pred_bbox = tf.concat(pred_bbox, axis=0) # shape [-1, 85] 所有anchor生成的(x, y, w, h, score, probability) # 第四步:确定预测框的信息 # 具体:确定预测框在原始图片位置--确定超出边界的预测框索引-确定分数大于一定阈值的预测框索引--确定概率最大所对应类别索引 # --确定满足三个索引的预测框 其中分数=置信值*分类最大概率值 # bboxes格式为[预测框的数量,预测框位置+分数+类别] shape为[-1,6] bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) # 第五步:预测框冗余处理 # 具体:找出拥有该类别最大分数的预测框-存储该预测框-计算该预测框与其他预测框的Iou-根据Iou相关条件删除预测框 # -剩下的预测框继续执行上述四个步骤,直至没有预测框 bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') # 第六步:绘制预测框 if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH+image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip())
第二部分 针对utils文件代码解析
import cv2 import random import colorsys import numpy as np from core.config import cfg def load_weights(model, weights_file): """ I agree that this code is very ugly, but I don’t know any better way of doing it. """ wf = open(weights_file, 'rb') major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) j = 0 for i in range(75): conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d' bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization' # 获取模型指定名称的网络层 # 方法:model.get_layer conv_layer = model.get_layer(conv_layer_name) filters = conv_layer.filters k_size = conv_layer.kernel_size[0] in_dim = conv_layer.input_shape[-1] if i not in [58, 66, 74]: # darknet weights: [beta, gamma, mean, variance] bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters) # tf weights: [gamma, beta, mean, variance] bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] bn_layer = model.get_layer(bn_layer_name) j += 1 else: conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) # darknet shape (out_dim, in_dim, height, width) conv_shape = (filters, in_dim, k_size, k_size) conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape)) # tf shape (height, width, in_dim, out_dim) conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0]) if i not in [58, 66, 74]: conv_layer.set_weights([conv_weights]) bn_layer.set_weights(bn_weights) else: conv_layer.set_weights([conv_weights, conv_bias]) assert len(wf.read()) == 0, 'failed to read all data' wf.close() # 读取文件,输出一个类别名称的字典 def read_class_names(class_file_name): '''loads class name from a file''' names = {} with open(class_file_name, 'r') as data: for ID, name in enumerate(data): names[ID] = name.strip('\n') return names def get_anchors(anchors_path): '''loads the anchors from a file''' with open(anchors_path) as f: anchors = f.readline() anchors = np.array(anchors.split(','), dtype=np.float32) return anchors.reshape(3, 3, 2) # 图像预处理 # 具体:图像比例缩放-填充-均一化 # 标注框: def image_preporcess(image, target_size, gt_boxes=None): # 比例缩放---cv2.resize ih, iw = target_size h, w, _ = image.shape scale = min(iw/w, ih/h) nw, nh = int(scale * w), int(scale * h) image_resized = cv2.resize(image, (nw, nh)) # 不足地方进行填充 image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0) dw, dh = (iw - nw) // 2, (ih-nh) // 2 image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized # 图像数值均一化 image_paded = image_paded / 255. # 标注框位置和宽高按比例缩放和修改 if gt_boxes is None: return image_paded else: gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh return image_paded, gt_boxes # 绘制预测框 def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True): """ bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. """ num_classes = len(classes) image_h, image_w, _ = image.shape hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(0) random.shuffle(colors) random.seed(None) for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) fontScale = 0.5 score = bbox[4] class_ind = int(bbox[5]) # 预测框颜色 bbox_color = colors[class_ind] # 预测框厚度 bbox_thick = int(0.6 * (image_h + image_w) / 600) # 预测框的左上和右下坐标 c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) # 绘制预测框 # 方法:cv2.rectangle cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) # 增加标注信息和框 # 方法:cv2.getTextSize 获取字符串高度与宽度 # cv2.putText 在图片上加文字 格式为[图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度] if show_label: bbox_mess = '%s: %.2f' % (classes[class_ind], score) # 计算文本字符串的高度与宽度 t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0] # 绘制标注信息的框 cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # 在图片上增加标注信息 cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA) return image # 计算Iou def bboxes_iou(boxes1, boxes2): boxes1 = np.array(boxes1) boxes2 = np.array(boxes2) boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) inter_section = np.maximum(right_down - left_up, 0.0) inter_area = inter_section[..., 0] * inter_section[..., 1] union_area = boxes1_area + boxes2_area - inter_area ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) return ious # 清除冗余的预测框--极大值抑制处理(NMS) def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): """ :param bboxes: (xmin, ymin, xmax, ymax, score, class) Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf https://github.com/bharatsingh430/soft-nms """ # 确定预测框的类别 # 方法:set去除重复项 list变成列表格式 classes_in_img = list(set(bboxes[:, 5])) best_bboxes = [] for cls in classes_in_img: # 对于每个类别来说: # 1. 找出相同类别的预测框 cls_mask = (bboxes[:, 5] == cls) # 技巧:确定符合条件的索引 cls_bboxes = bboxes[cls_mask] # 2. 清除指定类别下冗余的预测框 # 具体:找出拥有该类别最大分数的预测框-存储该预测框-计算该预测框与其他预测框的Iou-根据Iou相关条件删除预测框 # -剩下的预测框继续执行上述四个步骤,直至没有预测框 while len(cls_bboxes) > 0: max_ind = np.argmax(cls_bboxes[:, 4]) # 指定类别下预测框的最大分数的索引 best_bbox = cls_bboxes[max_ind] # 指定类别下拥有最大分数的预测框 best_bboxes.append(best_bbox) # 存储该预测框 cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) # 除此以外的预测框 # 计算该预测框与其他预测框的iou iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) # 确定符合条件的预测框 # 方法:针对每个预测框建立数值标签weight,符合条件的数值为1,不符合条件为0 # 具体: 'nms'表示Iou大于阈值的预测框删除,'soft-nms'表示Iou经过计算小于0的预测框删除 weight = np.ones((len(iou),), dtype=np.float32) assert method in ['nms', 'soft-nms'] if method == 'nms': iou_mask = iou > iou_threshold weight[iou_mask] = 0.0 if method == 'soft-nms': weight = np.exp(-(1.0 * iou ** 2 / sigma)) cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight score_mask = cls_bboxes[:, 4] > 0. cls_bboxes = cls_bboxes[score_mask] return best_bboxes # 确定预测框的信息 格式为[预测框的数量,预测框位置+分数+类别] shape为[-1,6] # 具体:确定预测框在原始图片位置--确定超出边界的预测框索引-确定分数大于一定阈值的预测框索引--确定概率最大所对应的类别 def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold): valid_scale = [0, np.inf] pred_bbox = np.array(pred_bbox) pred_xywh = pred_bbox[:, 0:4] pred_conf = pred_bbox[:, 4] pred_prob = pred_bbox[:, 5:] # # (1) # 具体:(x, y, w, h) --> (xmin, ymin, xmax, ymax) # 方法:np.concatenate 指定维度上进行数组拼接 pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5, pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1) # # (2) 求解预测框在原图上的位置 # 具体:(xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) # 方法:输入图片宽和长为1*1 有坐标(0.5,0.5) 那么在宽和长为2*3的图片上 该坐标为多少? 答案:(1,1.5) 利用相对位置不变原则 org_h, org_w = org_img_shape resize_ratio = min(input_size / org_w, input_size / org_h) dw = (input_size - resize_ratio * org_w) / 2 dh = (input_size - resize_ratio * org_h) / 2 # 去除填充的部分 pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio # (3)超出图片边界的预测框的坐标变为为0 # 具体:xmin, ymin小于原点坐标(图片左上角) xmax, ymax大于最大坐标(图片右上角)即被认为超出边界 # 方法: np.logical_or的逻辑判断 布尔型 pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) pred_coor[invalid_mask] = 0 # (xmin_org, ymin_org, xmax_org, ymax_org) # (4) 确定超出边界的预测框的索引 # 具体:计算每个预测框的面积,找出面积为0的,也就是找出超出边界的box的索引 # 方法:np.multiply.reduce按维度点乘 bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) # 符合条件的索引的列表 # # (5) 计算每个预测框的分数,确定高于分数阈值的预测框索引 # 具体:分数 = 类别概率*置信概率 置信表示该预测框内有目标的概率,类别概率指的是概率最大下所对应的类别下的概率 # 方法: 复合列表索引 > 符号输出逻辑判断 classes = np.argmax(pred_prob, axis=-1) # 返回关于列数的索引 # 技巧:复合列表索引,第一个列表是指的行数,第二个列表是指定的列数 scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] # shape 为多行一列 # > 符号输出逻辑判断 score_mask = scores > score_threshold mask = np.logical_and(scale_mask, score_mask) coors, scores, classes = pred_coor[mask], scores[mask], classes[mask] # np.concatenate将 coors, scores, classes三列拼接在一起 return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
第三部分 针对config文件的解析
# 创建一个字典 from easydict import EasyDict as edict __C = edict() # 创建一个字典 # Consumers can get config by: from config import cfg cfg = __C # YOLO options __C.YOLO = edict() # cfg字典里面创建键YOLO和键值,键值是一个字典 # Set the class name __C.YOLO.CLASSES = "./data/classes/coco.names" # 创建键值,其值是一个路径 __C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt" __C.YOLO.STRIDES = [8, 16, 32] __C.YOLO.ANCHOR_PER_SCALE = 3 __C.YOLO.IOU_LOSS_THRESH = 0.5 # Train options __C.TRAIN = edict() __C.TRAIN.ANNOT_PATH = "./data/dataset/yymnist_train.txt" __C.TRAIN.BATCH_SIZE = 4 # __C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] __C.TRAIN.INPUT_SIZE = [416] __C.TRAIN.DATA_AUG = True __C.TRAIN.LR_INIT = 1e-3 __C.TRAIN.LR_END = 1e-6 __C.TRAIN.WARMUP_EPOCHS = 2 __C.TRAIN.EPOCHS = 30 # TEST options __C.TEST = edict() __C.TEST.ANNOT_PATH = "./data/dataset/yymnist_test.txt" __C.TEST.BATCH_SIZE = 2 __C.TEST.INPUT_SIZE = 544 __C.TEST.DATA_AUG = False __C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/" __C.TEST.SCORE_THRESHOLD = 0.3 __C.TEST.IOU_THRESHOLD = 0.45 print(cfg)
- 点赞
- 收藏
- 分享
- 文章举报
相关文章推荐
- Tensorflow2.0:Yolo v3代码详解 (二)
- Tensorflow2.0:实现Yolo v3的五个问题及代码解析目录
- Yolo v3 - tensorflow代码解读
- Tensorflow2.0:Faster RCNN 代码详解(二)
- Tensorflow2.0:Faster RCNN 代码详解(一)
- yolov1代码详解
- TensorFlow中cnn-cifar10样例输入部分代码详解
- TensorFlow中cnn-cifar10样例部分代码详解
- TensorFlow手写字识别mnist入门篇(代码及详解)
- FCN—tensorflow版本代码超详解
- TensorFlow2.0,GPU代码测试
- 代码实例:如何使用 TensorFlow 2.0 Preview
- keras-tensorflow-yolo-v3 win10目标检测训练自己的数据集(一)
- Python开发tensorflow(多层神经网络)实现mnist手写识别代码详解
- yolo TensorFlow实战(二)yolo代码运行及解析
- 代码详解:用TensorFlow的Keras API建立多层神经网络
- Vue 2.0 侦听器 watch属性代码详解
- 《21个项目玩转深度学习--基于tensorflow的实践详解》代码实现和笔记(一)
- Tensorflow之Basic word2vec代码详解(上)
- tensorflow中的seq2seq的代码详解