您的位置:首页 > 编程语言

Faster R-CNN代码学习(一)——datasets模块

2019-03-26 18:59 239 查看

Faster R-CNN代码学习(一)——datasets模块

源代码:https://github.com/smallcorgi/Faster-RCNN_TF

datasets模块在lib文件夹下,负责的是对数据集的操作,包含数据集对象的创建、载入过程,这一部分在训练自己的数据集时需要重点修改。

datasets模块主要包含3个py文件,分别为所有数据集类的父类imdb.py;根据数据集特有创建的以VOC为例,pascal_voc.py;用于迅速创建数据集对象的factory.py

文章目录


下面依次进行介绍。

imdb.py

  1. imdb为所有数据集的父类,因此包含了所有数据集共有的属性。
class imdb(object, name):
def __init__(self, name)
self._name = name
self._classes = []
self._num_classes = []
self._image_index = []
self._roidb = None
self._roidb_handler = self.default_roidb # 是一个指针,指向不同的roi生成函数
self.config = {}

对于每一个数据集,其共有的属性都包含数据集名称name、数据集里有的类classes、数据集的图片样本image_index、数据集中的roi集合以及相关的设置config。

  1. 由于这些是私有属性,那么需要通过
    装饰器property
    将其取出,因此下面代码的主要内容为get这些属性。
@property
def name(self):
return self._name

@property
def classes(self):
return self._classes

@property
def num_classes(self):
return len(self._classes)

@property
def image_index(self):
return self._image_index

@property
def num_images(self):
return len(self.image_index)

@property
def roidb_handler(self):
return self._roidb_handler

@roidb_handler.setter
def roidb_handler(self, val):
self._roidb_handler = val

@property
def roidb(self):
# 如果已经有了,那么直接返回,没有就通过指针指向的函数生成
if self._roidb is not None:
return self._roidb
self._roidb = self.roidb_handler()
return self._roidb

# cache_path用来生成roidb缓存文件的文件夹,用来存储数据集的roi
@property
def cache_path(self):
cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
if not osp.exists(cache_path):
os.makedirs(cache_path)
return cache_path
  1. 部分方法需要依靠具体的数据集及相应路径来制定,因此仅声明接口:
def default_roidb(self):
raise NotImplementedError
def image_path_at(self):
raise NotImplementedError
  1. 数据集的共有方法:数据翻转扩增、recall指标评估、通过提供的Box_list创建roidb
# 在数据扩增前需要获取每张图片的width,这里引入了python通用的图片处理扩展包PIL
def _get_width(self):
return [PIL.Image.open(self.image_path_at(i)).size[0]
for i in range(self.num_images)]
# 这里对所有的图片进行数据扩增,这部分roidb的属性仅改变了相应x坐标及flipped
def append_flipped_images(self):
num_images = self.num_images
widths = self._get_width()
for i in range(num_images):
boxes = self.roidb[i].boxes.copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
boxes[:, 0] = widths[i] - oldx2 - 1
boxes[:, 2] = widths[i] - oldx1 - 1
assert (boxes[:, 2] >= boxes[:, 0]).all()
entry = {'boxes': boxes,
'gt_overlaps': self.roidb[i]['gt_overlaps'],
'gt_classes': self.roidb[i]['gt_classes'],
'flipped': True}
self.roidb.append(entry)
self._image_index = self._image_index * 2

recall指标评估是根据候选框来确定候选框的recall值

def evaluate_racall(self, candidate_boxes=None, thresholds=None,
area='all', limit=None):
# 制定了一系列的area范围,做了对应,先根据dict找到index,再通过area_ranges[index]找到范围,area用来筛选gt_bbox
areas = {'all': 0, 'small': 1, 'medium': 2, 'large':3,
'96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
area_ranges = [[0 ** 2, 1e5 ** 2],    # all
[0 ** 2, 32 ** 2],     # small
[32 ** 2, 96 ** 2],    # medium
[96 ** 2, 1e5 ** 2],   # large
[96 ** 2, 128 ** 2],   # 96-128
[128 ** 2, 256 ** 2],  # 128-256
[256 ** 2, 512 ** 2],  # 256-512
[512 ** 2, 1e5 ** 2],  # 512-inf
]
assert areas.has_key(area), 'unknown area range: {}'.foramt(area)
area_range = area_ranges[areas[areas]]

gt_overlaps = np.zeros(0)
num_pos = 0

for i in range(self.num_images):
# 对于每张图片,首先提取它的roidb信息
# self.roidb[i]['gt_overlaps'] (objs, num_classes)
max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
# self.roidb[i]['gt_classes'] (objs, )
gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
(max_gt_overlaps == 1))[0]
# self.roidb[i]['boxes'] (objs, 4)
gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
# self.roidb[i]['seg_areas'] (objs, )
gt_areas = self.roidb[i]['seg_areas'][gt_inds]
valid_gt_inds = np.where((gt_areas >= area_range[0]) &
(gt_areas <= area_range[1]))[0]
gt_boxes = gt_boxes[valid_gt_inds, :]
num_pos += len(valid_gt_inds)

if candidate_boxes is None:
non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
boxes = self.roidb[i][boxes][non_gt_inds, :]
else:
boxes = candidate_boxes[i]
if boxes.shape[0] == 0:
continue
if limit is not None and boxes.shape[0] > limit:
boxes = boxes[:limit, :]

overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
_gt_overlaps = np.zeros((gt_boxes.shape[0]))
# 对于每一张图片内的每一个gt_boxes,都要找到最大的IoU
for j in range(gt_boxes.shape[0]):
# 每个gt_box对应的最大box index及最大值
argmax_overlops = overlaps.argmax(axis=0)
max_overlaps = overlaps.max(axis=0)
# 在所有中的最大gt_index及最大值
gt_ind = max_overlaps.argmax()
gt_ovr = max_overlaps.max()
assert(gt_ovr >= 0)
# 找到这个最大gt_index对应的box index
box_ind = argmax_overlops[gt_ind]
_gt_overlaps[j] = overlaps[box_ind, gt_ind]
# mark the proposal box and the gt box as used
overlaps[box_ind, :] = -1
overlaps[:, gt_ind] = -1
gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

gt_overlaps = np.sort(gt_overlaps) # 所有图片的gt_overlaps进行排序
# 生成threshold来进行不同间隔内的recall计算
if thresholds is None:
step = 0.05
thresholds = np.arange(0.5, 0.95 + 1e-5, step)
recalls = np.zeros_like(thresholds)

for i, t in enumerate(thresholds):
recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
ar = recalls.mean()
return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
'gt_overlaps': gt_overlaps}

从box_list中创建roi的方法是为了创建一系列新的roi,这些roi需要根据跟真实gt_box的IoU计算其IoU。

def create_roidb_from_box_list(self, box_list, gt_roidb):
# 根据box_list创建roidb,需要根据跟gt_boxes的比例创造IoU
# box_list的长度必须跟图片的数量相同,相当于为每个图片创造roi
assert (len(box_list == self.num_images)), \
'Number of boxes must match number of ground-truth images'
roidb = []
for i in range(self.num_images):
boxes = box_list[i]
num_boxes = boxes.shape[0]
overlaps = np.zeros((num_boxes, self.num_classes))
if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
gt_boxes = gt_roidb[i]['boxes']
gt_classes = gt_roidb[i]['gt_classes']
gt_overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
# for every box find the max gt_box and index
argmaxes = gt_overlaps.argmax(axis=1)
maxes = gt_overlaps.max(axis=1)
I = np.where(maxes > 0)[0]
# 下面这句代码没太看懂,但其目的应该是从gt_overlaps取出值作为新roi的overlaps
overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

roidb.append({
'boxes': boxes,
'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
'gt_overlaps': overlaps,
'flipped': False,
'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
})
return roidb

最后是imdb的静态方法,用来合并roidb,只需要将roidb里的属性按照各自对应的维度stack即可,其中

gt_overlaps
需要进行稀疏矩阵的压缩。

@staticmethod
def merge_roidbs(a, b):
assert len(a) == len(b)
for i in range(len(a)):
a[i]['boxes'] = np.vstack(a[i]['boxes'], b[i]['boxes'])
a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
b[i]['gt_classes'])) # vector
a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
b[i]['gt_overlaps']])
a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
b[i]['seg_areas']))
return a

pascal_voc.py(以VOC数据集为例)

pascal_voc类从imdb类继承而来,因此具有imdb的所有属性及方法。需要根据数据集将imdb中的属性具体化,并添加数据集专有的一些特性例如:年份、路径、图片类型等。

  1. 从imdb继承来的初始化函数并添加专有属性
import os.path as ops
class pascal_voc(imdb):
# VOC数据集的名称,时间及路径
def __init__(self, image_set, year, devkit_path=None):
imdb.__init__(self, 'voc_' + year + image_set) # 调用父类的__init__
self._image_set = image_set
self._year = year
#=========================== 1. 导入数据集路径 ===========================
# 如果没有指定devkit_path,那么采用函数生成默认的路径,路径为Faster-RCNN/data/VOCdevkit2007
self._devkit_path = devkit_path if devkit is not None
else self._get_default_path()
self._data_path = ops.join(self._devkit_path, 'VOC' + self._year)
# ========================= 2. 导入数据的类别及样本 ===========================
self._classes = ('__background__', # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
self._class_to_index = dict{zip(self._classes, range(self.num_classes))}
self._image_ext = '.jpg' # 根据数据集产生扩展名
self._image_index = self._load_image_set_index()
# ======================== 3. 导入数据集的roi及相应的config ===========================
self._roidb_handler = self.gt_roidb # 指定专有的roi生成函数
self._salt = str(uuid.uuid4())
self._comp_id = 'comp4'

# PASCAL specific config options
self.config = {'cleanup'     : True,
'use_salt'    : True,
'use_diff'    : False,
'matlab_eval' : False,
'rpn_file'    : None,
'min_size'    : 2}
# 确认路径存在
assert os.path.exists(self._devkit_path), \
'VOCdevkit path does not exist: {}'.format(self._devkit_path)
assert os.path.exists(self._data_path), \
'Path does not exist: {}'.format(self._data_path)
  1. 上述专有的方法

默认路径生成,根据fast_rcnn.config对DATA_DIR的声明,生成VOC数据集的目录

def _get_default_path(self):
"""
:return: the default paht where PASCAL VOC is expected to be installed
Faster-RCNN/data/VOCdevkit2007
"""
return ops.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)

根据数据集名称导入相应的样本

def _load_image_set_index(self):
file_path = ops.join(self._data_path, 'ImageSets', 'Layout', self._image_set + '.txt')
assert(ops.exits(file_path)), \
'Path does not exist: {}'.format(image_set_file)
with file_path open as f:
image_index = [x.strip() for x in f.readlines()]
return image_index

检查是否存在roi缓存文件,如果存在则导入,如果不存在则创建,并保存。

def get_roidb(self):
cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl')
if osp.exists(cache_file):
with open(cache_file) as fid:
roidb = pickle.load(fid) # 使用到了pickle模块
return roidb
# 如果不存在,则为每一个图片调用生成roidb的函数,并返回一个字典的列表,并将列表保存成pkl文件,用做下次调用
gt_roidb = [self._load_pascal_annotaitons(self, index) for index in self._image_index]
with open(cache_file, 'wb') as fid:
pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
print('wrote gt_roidb to {}'.format(cache_file))
return gt_roidb

具体的load_pascal_annotation函数

import xml.etree.ElementTree as ET # 用来处理xml文件,提取节点
def _load_pascal_annotation(self, index):
# 根据index找到对应的xml文件
filename = osp.join(self._data_path, 'Annotation', index + '.xml')
tree = ET.parse(file_path)
objs = tree.findall('object') # 所有的object节点类
# 从中去掉标注为difficult的难训练对象
if not self.config['use_diff']:
non_diff_objs = [obj for obj in objs if (int(obj.find(difficult).text == 0))]
objs = non_diff_objs
num_objs = len(objs)
boxes = np.zeros((num_objs, 4), dtype=np.uint16) # 坐标
gt_classes = np.zeros((num_objs), dtype=np.int32) # 类
overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # IoU
seg_areas = np.zeros((num_objs), dtype=np.float32) # 面积
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
x1 = float(bbox.find('xmin').text) - 1
y1 = float(bbox.find('ymin').text) - 1
x2 = float(bbox.find('xmax').text) - 1
y2 = float(bbox.find('ymax').text) - 1
boxes[ix, :] = [x1, y1, x2, y2]
cls = self._class_to_index[obj.find('name').text.lower().strip()]
gt_classes[ix] = cls
overlaps[ix, cls] = 1.0
seg_areas = (x2 - x1 + 1) * (y2 - y1 + 1)
import scipy.sparse
overlaps = scipy.sparse.csr_matrix(overlaps) # 进行矩阵压缩
return {'boxes' : boxes,
'gt_classes': gt_classes,
'gt_overlaps' : overlaps,
'flipped' : False,
'seg_areas' : seg_areas}

factory.py

通过这个文件来批量产生同类型的数据,例如同为VOC系列,年份不同或不同的train或test数据集。

for year in ['2007', '2012']:
for split in ['train', 'val', 'trainval', 'test']:
name = 'voc_{}_{}'.format(year, split)
# 为啥使用lambda匿名函数没搞懂
__sets[name] = (lambda split=split, year=year:
datasets.pascal_voc(split, year))
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: