您的位置:首页 > 其它

Mask RCNN from Detectron

2018-03-22 18:05 441 查看
原文地址

Custom your own mask rcnn

We mainly talk about how to add your own data branch or control branch to revise the mask rcnn:

mask rcnn api

add your own layer

api

Mask rcnn is a framework that add three tasks together into a single network while trained end to end.

def train_model():
"""Model training loop."""
logger = logging.getLogger(__name__)
model, start_iter, checkpoints, output_dir = create_model()
if 'final' in checkpoints:
# The final model was found in the output directory, so nothing to do
return checkpoints

setup_model_for_training(model, output_dir)
training_stats = TrainingStats(model)
CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS)

for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER):
training_stats.IterTic()
lr = model.UpdateWorkspaceLr(cur_iter, lr_policy.get_lr_at_iter(cur_iter))
workspace.RunNet(model.net.Proto().name)
if cur_iter == start_iter:
nu.print_net(model)
training_stats.IterToc()
training_stats.UpdateIterStats()
training_stats.LogIterStats(cur_iter, lr)

if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter:
checkpoints[cur_iter] = os.path.join(
output_dir, 'model_iter{}.pkl'.format(cur_iter)
)
nu.save_model_to_weights_file(checkpoints[cur_iter], model)

if cur_iter == start_iter + training_stats.LOG_PERIOD:
# Reset the iteration timer to remove outliers from the first few
# SGD iterations
training_stats.ResetIterTimer()

if np.isnan(training_stats.iter_total_loss):
logger.critical('Loss is NaN, exiting...')
model.roi_data_loader.shutdown()
envu.exit_on_error()

# Save the final model
checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl')
nu.save_model_to_weights_file(checkpoints['final'], model)
# Shutdown data loading threads
model.roi_data_loader.shutdown()
return checkpoints


…………..-> RoI \

———————- -> RoIFeatureXform -> mask head -> mask output -> loss

…………. -> Feature /

The mask head produces a feature representation of the RoI for the purpose of mask prediction. The mask output module converts the feature representation into real-valued (soft) masks.

def create(model_type_func, train=False, gpu_id=0):
"""Generic model creation function that dispatches to specific model
building functions.

By default, this function will generate a data parallel model configured to
run on cfg.NUM_GPUS devices. However, you can restrict it to build a model
targeted to a specific GPU by specifying gpu_id. This is used by
optimizer.build_data_parallel_model() during test time.
"""
model = DetectionModelHelper(
name=model_type_func,
train=train,
num_classes=cfg.MODEL.NUM_CLASSES,
init_params=train
)
model.only_build_forward_pass = False
model.target_gpu_id = gpu_id
return get_func(model_type_func)(model)


# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0 #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Defines DetectionModelHelper, the class that represents a Detectron model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import logging

from caffe2.python import cnn
from caffe2.python import core
from caffe2.python import workspace
from caffe2.python.modeling import initializers
from caffe2.python.modeling.parameter_info import ParameterTags

from core.config import cfg
from ops.collect_and_distribute_fpn_rpn_proposals \
import CollectAndDistributeFpnRpnProposalsOp
from ops.generate_proposal_labels import GenerateProposalLabelsOp
from ops.generate_proposals import GenerateProposalsOp
import roi_data.fast_rcnn
import utils.c2 as c2_utils

logger = logging.getLogger(__name__)

class DetectionModelHelper(cnn.DetectionModelHelper):
def __init__(self, **kwargs):
# Handle args specific to the DetectionModelHelper, others pass through
# to CNNModelHelper
self.train = kwargs.get('train', False)
self.num_classes = kwargs.get('num_classes', -1)
assert self.num_classes > 0, 'num_classes must be > 0'
for k in ('train', 'num_classes'):
if k in kwargs:
del kwargs[k]
kwargs['order'] = 'NCHW'
# Defensively set cudnn_exhaustive_search to False in case the default
# changes in CNNModelHelper. The detection code uses variable size
# inputs that might not play nicely with cudnn_exhaustive_search.
kwargs['cudnn_exhaustive_search'] = False
super(DetectionModelHelper, self).__init__(**kwargs)
self.roi_data_loader = None
self.losses = []
self.metrics = []
self.do_not_update_params = []  # Param on this list are not updated
self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE
self.net.Proto().num_workers = cfg.NUM_GPUS * 4
self.prev_use_cudnn = self.use_cudnn

def TrainableParams(self, gpu_id=-1):
"""Get the blob names for all trainable parameters, possibly filtered by
GPU id.
"""
return [
p for p in self.params
if (
p in self.param_to_grad and   # p has a gradient
p not in self.do_not_update_params and  # not on the blacklist
(gpu_id == -1 or  # filter for gpu assignment, if gpu_id set
str(p).find('gpu_{}'.format(gpu_id)) == 0)
)]

def AffineChannel(self, blob_in, blob_out, dim, inplace=False):
"""Affine transformation to replace BN in networks where BN cannot be
used (e.g., because the minibatch size is too small).

The operations can be done in place to save memory.
"""
blob_out = blob_out or self.net.NextName()
param_prefix = blob_out

scale = self.create_param(
param_name=param_prefix + '_s',
initializer=initializers.Initializer("ConstantFill", value=1.),
tags=ParameterTags.WEIGHT,
shape=[dim, ],
)
bias = self.create_param(
param_name=param_prefix + '_b',
initializer=initializers.Initializer("ConstantFill", value=0.),
tags=ParameterTags.BIAS,
shape=[dim, ],
)
if inplace:
return self.net.AffineChannel([blob_in, scale, bias], blob_in)
else:
return self.net.AffineChannel([blob_in, scale, bias], blob_out)

def GenerateProposals(self, blobs_in, blobs_out, anchors, spatial_scale):
"""Op for generating RPN porposals.

blobs_in:
- 'rpn_cls_probs': 4D tensor of shape (N, A, H, W), where N is the
number of minibatch images, A is the number of anchors per
locations, and (H, W) is the spatial size of the prediction grid.
Each value represents a "probability of object" rating in [0, 1].
- 'rpn_bbox_pred': 4D tensor of shape (N, 4 * A, H, W) of predicted
deltas for transformation anchor boxes into RPN proposals.
- 'im_info': 2D tensor of shape (N, 3) where the three columns encode
the input image's [height, width, scale]. Height and width are
for the input to the network, not the original image; scale is the
scale factor used to scale the original image to the network input
size.

blobs_out:
- 'rpn_rois': 2D tensor of shape (R, 5), for R RPN proposals where the
five columns encode [batch ind, x1, y1, x2, y2]. The boxes are
w.r.t. the network input, which is a *scaled* version of the
original image; these proposals must be scaled by 1 / scale (where
scale comes from im_info; see above) to transform it back to the
original input image coordinate system.
- 'rpn_roi_probs': 1D tensor of objectness probability scores
(extracted from rpn_cls_probs; see above).
"""
name = 'GenerateProposalsOp:' + ','.join([str(b) for b in blobs_in])
# spatial_scale passed to the Python op is only used in convert_pkl_to_pb
self.net.Python(
GenerateProposalsOp(anchors, spatial_scale, self.train).forward
)(blobs_in, blobs_out, name=name, spatial_scale=spatial_scale)
return blobs_out

def GenerateProposalLabels(self, blobs_in):
"""Op for generating training labels for RPN proposals. This is used
when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
Faster R-CNN training).

blobs_in:
- 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
- 'roidb': roidb entries that will be labeled
- 'im_info': See GenerateProposals doc.

blobs_out:
- (variable set of blobs): returns whatever blobs are required for
training the model. It does this by querying the data loader for
the list of blobs that are needed.
"""
name = 'GenerateProposalLabelsOp:' + ','.join(
[str(b) for b in blobs_in]
)

# The list of blobs is not known before run-time because it depends on
# the specific model being trained. Query the data loader to get the
# list of output blob names.
blobs_out = roi_data.fast_rcnn.get_fast_rcnn_blob_names(
is_training=self.train
)
blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]

self.net.Python(GenerateProposalLabelsOp().forward)(
blobs_in, blobs_out, name=name
)
return blobs_out

def CollectAndDistributeFpnRpnProposals(self):
"""Merge RPN proposals generated at multiple FPN levels and then
distribute those proposals to their appropriate FPN levels. An anchor
at one FPN level may predict an RoI that will map to another level,
hence the need to redistribute the proposals.

This function assumes standard blob names for input and output blobs.

Input blobs: [rpn_rois_fpn<min>, ..., rpn_rois_fpn<max>,
rpn_roi_probs_fpn<min>, ..., rpn_roi_probs_fpn<max>]
- rpn_rois_fpn<i> are the RPN proposals for FPN level i; see rpn_rois
documentation from GenerateProposals.
- rpn_roi_probs_fpn<i> are the RPN objectness probabilities for FPN
level i; see rpn_roi_probs documentation from GenerateProposals.

If used during training, then the input blobs will also include:
[roidb, im_info] (see GenerateProposalLabels).

Output blobs: [rois_fpn<min>, ..., rois_rpn<max>, rois,
rois_idx_restore]
- rois_fpn<i> are the RPN proposals for FPN level i
- rois_idx_restore is a permutation on the concatenation of all
rois_fpn<i>, i=min...max, such that when applied the RPN RoIs are
restored to their original order in the input blobs.

If used during training, then the output blobs will also include:
[labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].
"""
k_max = cfg.FPN.RPN_MAX_LEVEL
k_min = cfg.FPN.RPN_MIN_LEVEL

# Prepare input blobs
rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)]
score_names = [
'rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1)
]
blobs_in = rois_names + score_names
if self.train:
blobs_in += ['roidb', 'im_info']
blobs_in = [core.ScopedBlobReference(b) for b in blobs_in]
name = 'CollectAndDistributeFpnRpnProposalsOp:' + ','.join(
[str(b) for b in blobs_in]
)

# Prepare output blobs
blobs_out = roi_data.fast_rcnn.get_fast_rcnn_blob_names(
is_training=self.train
)
blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]

outputs = self.net.Python(
CollectAndDistributeFpnRpnProposalsOp(self.train).forward
)(blobs_in, blobs_out, name=name)

return outputs

def DropoutIfTraining(self, blob_in, dropout_rate):
"""Add dropout to blob_in if the model is in training mode and
dropout_rate is > 0."""
blob_out = blob_in
if self.train and dropout_rate > 0:
blob_out = self.Dropout(
blob_in, blob_in, ratio=dropout_rate, is_test=False
)
return blob_out

def RoIFeatureTransform(
self,
blobs_in,
blob_out,
blob_rois='rois',
method='RoIPoolF',
resolution=7,
spatial_scale=1. / 16.,
sampling_ratio=0
):
"""Add the specified RoI pooling method. The sampling_ratio argument
is supported for some, but not all, RoI transform methods.

RoIFeatureTransform abstracts away:
- Use of FPN or not
- Specifics of the transform method
"""
assert method in {'RoIPoolF', 'RoIAlign'}, \
'Unknown pooling method: {}'.format(method)
has_argmax = (method == 'RoIPoolF')
if isinstance(blobs_in, list):
# FPN case: add RoIFeatureTransform to each FPN level
k_max = cfg.FPN.ROI_MAX_LEVEL  # coarsest level of pyramid
k_min = cfg.FPN.ROI_MIN_LEVEL  # finest level of pyramid
assert len(blobs_in) == k_max - k_min + 1
bl_out_list = []
for lvl in range(k_min, k_max + 1):
bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
sc = spatial_scale[k_max - lvl]  # in reversed order
bl_rois = blob_rois + '_fpn' + str(lvl)
bl_out = blob_out + '_fpn' + str(lvl)
bl_out_list.append(bl_out)
bl_argmax = ['_argmax_' + bl_out] if has_argmax else []
self.net.__getattr__(method)(
[bl_in, bl_rois], [bl_out] + bl_argmax,
pooled_w=resolution,
pooled_h=resolution,
spatial_scale=sc,
sampling_ratio=sampling_ratio
)
# The pooled features from all levels are concatenated along the
# batch dimension into a single 4D tensor.
xform_shuffled, _ = self.net.Concat(
bl_out_list, [blob_out + '_shuffled', '_concat_' + blob_out],
axis=0
)
# Unshuffle to match rois from dataloader
restore_bl = blob_rois + '_idx_restore_int32'
xform_out = self.net.BatchPermutation(
[xform_shuffled, restore_bl], blob_out
)
else:
# Single feature level
bl_argmax = ['_argmax_' + blob
cd37
_out] if has_argmax else []
# sampling_ratio is ignored for RoIPoolF
xform_out = self.net.__getattr__(method)(
[blobs_in, blob_rois], [blob_out] + bl_argmax,
pooled_w=resolution,
pooled_h=resolution,
spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio
)
# Only return the first blob (the transformed features)
return xform_out

def ConvShared(
self,
blob_in,
blob_out,
dim_in,
dim_out,
kernel,
weight=None,
bias=None,
**kwargs
):
"""Add conv op that shares weights and/or biases with another conv op.
"""
use_bias = (
False if ('no_bias' in kwargs and kwargs['no_bias']) else True
)

if self.use_cudnn:
kwargs['engine'] = 'CUDNN'
kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
if self.ws_nbytes_limit:
kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit

if use_bias:
blobs_in = [blob_in, weight, bias]
else:
blobs_in = [blob_in, weight]

if 'no_bias' in kwargs:
del kwargs['no_bias']

return self.net.Conv(
blobs_in, blob_out, kernel=kernel, order=self.order, **kwargs
)

def BilinearInterpolation(
self, blob_in, blob_out, dim_in, dim_out, up_scale
):
"""Bilinear interpolation in space of scale.

Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale

Adapted from the CVPR'15 FCN code.
See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py """
assert dim_in == dim_out
assert up_scale % 2 == 0, 'Scale should be even'

def upsample_filt(size):
factor = (size + 1) // 2
if size % 2 == 1:
center = factor - 1
else:
center = factor - 0.5
og = np.ogrid[:size, :size]
return ((1 - abs(og[0] - center) / factor) *
(1 - abs(og[1] - center) / factor))

kernel_size = up_scale * 2
bil_filt = upsample_filt(kernel_size)

kernel = np.zeros(
(dim_in, dim_out, kernel_size, kernel_size), dtype=np.float32
)
kernel[range(dim_out), range(dim_in), :, :] = bil_filt

blob = self.ConvTranspose(
blob_in,
blob_out,
dim_in,
dim_out,
kernel_size,
stride=int(up_scale),
pad=int(up_scale / 2),
weight_init=('GivenTensorFill', {'values': kernel}),
bias_init=('ConstantFill', {'value': 0.})
)
self.do_not_update_params.append(self.weights[-1])
self.do_not_update_params.append(self.biases[-1])
return blob

def ConvAffine(  # args in the same order of Conv()
self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,
group=1, dilation=1,
weight_init=None,
bias_init=None,
suffix='_bn',
inplace=False
):
"""ConvAffine adds a Conv op followed by a AffineChannel op (which
replaces BN during fine tuning).
"""
conv_blob = self.Conv(
blob_in,
prefix,
dim_in,
dim_out,
kernel,
stride=stride,
pad=pad,
group=group,
dilation=dilation,
weight_init=weight_init,
bias_init=bias_init,
no_bias=1
)
blob_out = self.AffineChannel(
conv_blob, prefix + suffix, dim=dim_out, inplace=inplace
)
return blob_out

def DisableCudnn(self):
self.prev_use_cudnn = self.use_cudnn
self.use_cudnn = False

def RestorePreviousUseCudnn(self):
prev_use_cudnn = self.use_cudnn
self.use_cudnn = self.prev_use_cudnn
self.prev_use_cudnn = prev_use_cudnn

def UpdateWorkspaceLr(self, cur_iter, new_lr):
"""Updates the model's current learning rate and the workspace (learning
rate and update history/momentum blobs).
"""
# The workspace is the one source of truth for the lr
# The lr is always the same on all GPUs
cur_lr = workspace.FetchBlob('gpu_0/lr')[0]
# There are no type conversions between the lr in Python and the lr in
# the GPU (both are float32), so exact comparision is ok
if cur_lr != new_lr:
ratio = _get_lr_change_ratio(cur_lr, new_lr)
if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
logger.info(
'Changing learning rate {:.6f} -> {:.6f} at iter {:d}'.
format(cur_lr, new_lr, cur_iter))
self._SetNewLr(cur_lr, new_lr)
return new_lr

def _SetNewLr(self, cur_lr, new_lr):
"""Do the actual work of updating the model and workspace blobs.
"""
for i in range(cfg.NUM_GPUS):
with c2_utils.CudaScope(i):
workspace.FeedBlob(
'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
ratio = _get_lr_change_ratio(cur_lr, new_lr)
if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
self._CorrectMomentum(new_lr / cur_lr)

def _CorrectMomentum(self, correction):
"""The MomentumSGDUpdate op implements the update V as

V := mu * V + lr * grad,

where mu is the momentum factor, lr is the learning rate, and grad is
the stochastic gradient. Since V is not defined independently of the
learning rate (as it should ideally be), when the learning rate is
changed we should scale the update history V in order to make it
compatible in scale with lr * grad.
"""
logger.info(
'Scaling update history by {:.6f} (new lr / old lr)'.
format(correction))
for i in range(cfg.NUM_GPUS):
with c2_utils.CudaScope(i):
for param in self.TrainableParams(gpu_id=i):
op = core.CreateOperator(
'Scale', [param + '_momentum'], [param + '_momentum'],
scale=correction)
workspace.RunOperatorOnce(op)

def GetLossScale(self):
"""Allow a way to configure the loss scale dynamically.

This may be used in a distributed data parallel setting.
"""
return 1.0 / cfg.NUM_GPUS

def AddLosses(self, losses):
if not isinstance(losses, list):
losses = [losses]
# Conversion to str allows losses to include BlobReferences
losses = [c2_utils.UnscopeName(str(l)) for l in losses]
self.losses = list(set(self.losses + losses))

def AddMetrics(self, metrics):
if not isinstance(metrics, list):
metrics = [metrics]
self.metrics = list(set(self.metrics + metrics))

def _get_lr_change_ratio(cur_lr, new_lr):
eps = 1e-10
ratio = np.max(
(new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps)))
)
return ratio
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  Mask RCNN Detectron