Faster R-CNN代码学习(一)——datasets模块
2019-03-26 18:59
239 查看
Faster R-CNN代码学习(一)——datasets模块
源代码:https://github.com/smallcorgi/Faster-RCNN_TF
datasets模块在lib文件夹下,负责的是对数据集的操作,包含数据集对象的创建、载入过程,这一部分在训练自己的数据集时需要重点修改。
datasets模块主要包含3个py文件,分别为所有数据集类的父类imdb.py;根据数据集特有创建的以VOC为例,pascal_voc.py;用于迅速创建数据集对象的factory.py。
文章目录
下面依次进行介绍。
imdb.py
- imdb为所有数据集的父类,因此包含了所有数据集共有的属性。
class imdb(object, name): def __init__(self, name) self._name = name self._classes = [] self._num_classes = [] self._image_index = [] self._roidb = None self._roidb_handler = self.default_roidb # 是一个指针,指向不同的roi生成函数 self.config = {}
对于每一个数据集,其共有的属性都包含数据集名称name、数据集里有的类classes、数据集的图片样本image_index、数据集中的roi集合以及相关的设置config。
- 由于这些是私有属性,那么需要通过
装饰器property
将其取出,因此下面代码的主要内容为get这些属性。
@property def name(self): return self._name @property def classes(self): return self._classes @property def num_classes(self): return len(self._classes) @property def image_index(self): return self._image_index @property def num_images(self): return len(self.image_index) @property def roidb_handler(self): return self._roidb_handler @roidb_handler.setter def roidb_handler(self, val): self._roidb_handler = val @property def roidb(self): # 如果已经有了,那么直接返回,没有就通过指针指向的函数生成 if self._roidb is not None: return self._roidb self._roidb = self.roidb_handler() return self._roidb # cache_path用来生成roidb缓存文件的文件夹,用来存储数据集的roi @property def cache_path(self): cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache')) if not osp.exists(cache_path): os.makedirs(cache_path) return cache_path
- 部分方法需要依靠具体的数据集及相应路径来制定,因此仅声明接口:
def default_roidb(self): raise NotImplementedError def image_path_at(self): raise NotImplementedError
- 数据集的共有方法:数据翻转扩增、recall指标评估、通过提供的Box_list创建roidb
# 在数据扩增前需要获取每张图片的width,这里引入了python通用的图片处理扩展包PIL def _get_width(self): return [PIL.Image.open(self.image_path_at(i)).size[0] for i in range(self.num_images)] # 这里对所有的图片进行数据扩增,这部分roidb的属性仅改变了相应x坐标及flipped def append_flipped_images(self): num_images = self.num_images widths = self._get_width() for i in range(num_images): boxes = self.roidb[i].boxes.copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = widths[i] - oldx2 - 1 boxes[:, 2] = widths[i] - oldx1 - 1 assert (boxes[:, 2] >= boxes[:, 0]).all() entry = {'boxes': boxes, 'gt_overlaps': self.roidb[i]['gt_overlaps'], 'gt_classes': self.roidb[i]['gt_classes'], 'flipped': True} self.roidb.append(entry) self._image_index = self._image_index * 2
recall指标评估是根据候选框来确定候选框的recall值
def evaluate_racall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): # 制定了一系列的area范围,做了对应,先根据dict找到index,再通过area_ranges[index]找到范围,area用来筛选gt_bbox areas = {'all': 0, 'small': 1, 'medium': 2, 'large':3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [[0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], # 512-inf ] assert areas.has_key(area), 'unknown area range: {}'.foramt(area) area_range = area_ranges[areas[areas]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # 对于每张图片,首先提取它的roidb信息 # self.roidb[i]['gt_overlaps'] (objs, num_classes) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) # self.roidb[i]['gt_classes'] (objs, ) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] # self.roidb[i]['boxes'] (objs, 4) gt_boxes = self.roidb[i]['boxes'][gt_inds, :] # self.roidb[i]['seg_areas'] (objs, ) gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i][boxes][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) # 对于每一张图片内的每一个gt_boxes,都要找到最大的IoU for j in range(gt_boxes.shape[0]): # 每个gt_box对应的最大box index及最大值 argmax_overlops = overlaps.argmax(axis=0) max_overlaps = overlaps.max(axis=0) # 在所有中的最大gt_index及最大值 gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert(gt_ovr >= 0) # 找到这个最大gt_index对应的box index box_ind = argmax_overlops[gt_ind] _gt_overlaps[j] = overlaps[box_ind, gt_ind] # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) # 所有图片的gt_overlaps进行排序 # 生成threshold来进行不同间隔内的recall计算 if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps}
从box_list中创建roi的方法是为了创建一系列新的roi,这些roi需要根据跟真实gt_box的IoU计算其IoU。
def create_roidb_from_box_list(self, box_list, gt_roidb): # 根据box_list创建roidb,需要根据跟gt_boxes的比例创造IoU # box_list的长度必须跟图片的数量相同,相当于为每个图片创造roi assert (len(box_list == self.num_images)), \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes)) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) # for every box find the max gt_box and index argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] # 下面这句代码没太看懂,但其目的应该是从gt_overlaps取出值作为新roi的overlaps overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes,), dtype=np.float32), }) return roidb
最后是imdb的静态方法,用来合并roidb,只需要将roidb里的属性按照各自对应的维度stack即可,其中
gt_overlaps需要进行稀疏矩阵的压缩。
@staticmethod def merge_roidbs(a, b): assert len(a) == len(b) for i in range(len(a)): a[i]['boxes'] = np.vstack(a[i]['boxes'], b[i]['boxes']) a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], b[i]['gt_classes'])) # vector a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'], b[i]['gt_overlaps']]) a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'], b[i]['seg_areas'])) return a
pascal_voc.py(以VOC数据集为例)
pascal_voc类从imdb类继承而来,因此具有imdb的所有属性及方法。需要根据数据集将imdb中的属性具体化,并添加数据集专有的一些特性例如:年份、路径、图片类型等。
- 从imdb继承来的初始化函数并添加专有属性
import os.path as ops class pascal_voc(imdb): # VOC数据集的名称,时间及路径 def __init__(self, image_set, year, devkit_path=None): imdb.__init__(self, 'voc_' + year + image_set) # 调用父类的__init__ self._image_set = image_set self._year = year #=========================== 1. 导入数据集路径 =========================== # 如果没有指定devkit_path,那么采用函数生成默认的路径,路径为Faster-RCNN/data/VOCdevkit2007 self._devkit_path = devkit_path if devkit is not None else self._get_default_path() self._data_path = ops.join(self._devkit_path, 'VOC' + self._year) # ========================= 2. 导入数据的类别及样本 =========================== self._classes = ('__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') self._class_to_index = dict{zip(self._classes, range(self.num_classes))} self._image_ext = '.jpg' # 根据数据集产生扩展名 self._image_index = self._load_image_set_index() # ======================== 3. 导入数据集的roi及相应的config =========================== self._roidb_handler = self.gt_roidb # 指定专有的roi生成函数 self._salt = str(uuid.uuid4()) self._comp_id = 'comp4' # PASCAL specific config options self.config = {'cleanup' : True, 'use_salt' : True, 'use_diff' : False, 'matlab_eval' : False, 'rpn_file' : None, 'min_size' : 2} # 确认路径存在 assert os.path.exists(self._devkit_path), \ 'VOCdevkit path does not exist: {}'.format(self._devkit_path) assert os.path.exists(self._data_path), \ 'Path does not exist: {}'.format(self._data_path)
- 上述专有的方法
默认路径生成,根据fast_rcnn.config对DATA_DIR的声明,生成VOC数据集的目录
def _get_default_path(self): """ :return: the default paht where PASCAL VOC is expected to be installed Faster-RCNN/data/VOCdevkit2007 """ return ops.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)
根据数据集名称导入相应的样本
def _load_image_set_index(self): file_path = ops.join(self._data_path, 'ImageSets', 'Layout', self._image_set + '.txt') assert(ops.exits(file_path)), \ 'Path does not exist: {}'.format(image_set_file) with file_path open as f: image_index = [x.strip() for x in f.readlines()] return image_index
检查是否存在roi缓存文件,如果存在则导入,如果不存在则创建,并保存。
def get_roidb(self): cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.exists(cache_file): with open(cache_file) as fid: roidb = pickle.load(fid) # 使用到了pickle模块 return roidb # 如果不存在,则为每一个图片调用生成roidb的函数,并返回一个字典的列表,并将列表保存成pkl文件,用做下次调用 gt_roidb = [self._load_pascal_annotaitons(self, index) for index in self._image_index] with open(cache_file, 'wb') as fid: pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) print('wrote gt_roidb to {}'.format(cache_file)) return gt_roidb
具体的load_pascal_annotation函数
import xml.etree.ElementTree as ET # 用来处理xml文件,提取节点 def _load_pascal_annotation(self, index): # 根据index找到对应的xml文件 filename = osp.join(self._data_path, 'Annotation', index + '.xml') tree = ET.parse(file_path) objs = tree.findall('object') # 所有的object节点类 # 从中去掉标注为difficult的难训练对象 if not self.config['use_diff']: non_diff_objs = [obj for obj in objs if (int(obj.find(difficult).text == 0))] objs = non_diff_objs num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) # 坐标 gt_classes = np.zeros((num_objs), dtype=np.int32) # 类 overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # IoU seg_areas = np.zeros((num_objs), dtype=np.float32) # 面积 for ix, obj in enumerate(objs): bbox = obj.find('bndbox') x1 = float(bbox.find('xmin').text) - 1 y1 = float(bbox.find('ymin').text) - 1 x2 = float(bbox.find('xmax').text) - 1 y2 = float(bbox.find('ymax').text) - 1 boxes[ix, :] = [x1, y1, x2, y2] cls = self._class_to_index[obj.find('name').text.lower().strip()] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 seg_areas = (x2 - x1 + 1) * (y2 - y1 + 1) import scipy.sparse overlaps = scipy.sparse.csr_matrix(overlaps) # 进行矩阵压缩 return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_overlaps' : overlaps, 'flipped' : False, 'seg_areas' : seg_areas}
factory.py
通过这个文件来批量产生同类型的数据,例如同为VOC系列,年份不同或不同的train或test数据集。
for year in ['2007', '2012']: for split in ['train', 'val', 'trainval', 'test']: name = 'voc_{}_{}'.format(year, split) # 为啥使用lambda匿名函数没搞懂 __sets[name] = (lambda split=split, year=year: datasets.pascal_voc(split, year))
相关文章推荐
- Faster R-CNN代码学习(二)——utils模块
- faster rcnn 源码学习-------数据读入及RoIDataLayer相关模块解读 + Train的流程
- Keras版Faster-RCNN代码学习(loss,xml解析)3
- 学习Faster R-CNN代码roi_pooling(三)
- 学习Faster R-CNN代码demo(一)
- 学习Faster R-CNN代码roi_pooling(二)
- 深度学习 8. MatConvNet 相关函数解释说明,MatConvNet 代码理解(四)cnn_train.m 的注释
- 面向代码】学习 Deep Learning(三)Convolution Neural Network(CNN)
- r-cnn学习(四):train_faster_rcnn_alt_opt.py源码学习
- faster rcnn python layer学习
- pycharm下运行官方的py-faster R-CNN的caffe代码
- R-CNN,SPP-NET, Fast-R-CNN,Faster-R-CNN, YOLO, SSD系列深度学习检测方法梳理
- python 常用代码学习笔记之commands模块
- R-CNN,SPP-NET, Fast-R-CNN,Faster-R-CNN, YOLO, SSD系列深度学习检测方法梳理
- 深度学习Matlab工具箱代码注释——cnntrain.m
- 【面向代码】学习 Deep Learning(三)Convolution Neural Network(CNN)
- 【面向代码】学习 Deep Learning(三)Convolution Neural Network(CNN)
- 【神经网络与深度学习】【计算机视觉】Faster R-CNN
- 【面向代码】学习 Deep Learning(三)Convolution Neural Network(CNN)