您的位置：首页 > 其它

SSD：Single Shot MultiBox Detector 心得之神经网路（持续更新中。。。）

2016-07-18 09:58 489 查看

首先看下文章里的网路截图：

根据网络配置文件"caffe-ssd/models/VGGNet/VOC0712/SSD_300x300/train.prototxt"

name: "VGG_VOC0712_SSD_300x300_train"
layer {
name: "data"
type: "AnnotatedData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
mean_value: 104
mean_value: 117
mean_value: 123
resize_param {
prob: 1
resize_mode: WARP
height: 300
width: 300
interp_mode: LINEAR
interp_mode: AREA
interp_mode: NEAREST
interp_mode: CUBIC
interp_mode: LANCZOS4
}
emit_constraint {
emit_type: CENTER
}
}
data_param {
source: "examples/VOC0712/VOC0712_trainval_lmdb"
batch_size: 32
backend: LMDB
}
annotated_data_param {
batch_sampler {
max_sample: 1
max_trials: 1
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.1
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.3
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.5
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.7
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.9
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
max_jaccard_overlap: 1.0
}
max_sample: 1
max_trials: 50
}
label_map_file: "data/VOC0712/labelmap_voc.prototxt"
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 1
pad: 1
}
}
layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 1024
pad: 6
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
dilation: 6
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "Convolution"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 1024
kernel_size: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "conv6_1"
type: "Convolution"
bottom: "fc7"
top: "conv6_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_1_relu"
type: "ReLU"
bottom: "conv6_1"
top: "conv6_1"
}
layer {
name: "conv6_2"
type: "Convolution"
bottom: "conv6_1"
top: "conv6_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_relu"
type: "ReLU"
bottom: "conv6_2"
top: "conv6_2"
}
layer {
name: "conv7_1"
type: "Convolution"
bottom: "conv6_2"
top: "conv7_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_1_relu"
type: "ReLU"
bottom: "conv7_1"
top: "conv7_1"
}
layer {
name: "conv7_2"
type: "Convolution"
bottom: "conv7_1"
top: "conv7_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_relu"
type: "ReLU"
bottom: "conv7_2"
top: "conv7_2"
}
layer {
name: "conv8_1"
type: "Convolution"
bottom: "conv7_2"
top: "conv8_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_1_relu"
type: "ReLU"
bottom: "conv8_1"
top: "conv8_1"
}
layer {
name: "conv8_2"
type: "Convolution"
bottom: "conv8_1"
top: "conv8_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_relu"
type: "ReLU"
bottom: "conv8_2"
top: "conv8_2"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv8_2"
top: "pool6"
pooling_param {
pool: AVE
global_pooling: true
}
}
layer {
name: "conv4_3_norm"
type: "Normalize"
bottom: "conv4_3"
top: "conv4_3_norm"
norm_param {
across_spatial: false
scale_filler {
type: "constant"
value: 20
}
channel_shared: false
}
}
layer {
name: "conv4_3_norm_mbox_loc"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_loc_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_loc"
top: "conv4_3_norm_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_loc_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_loc_perm"
top: "conv4_3_norm_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 63
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_conf_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_conf"
top: "conv4_3_norm_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_conf_perm"
top: "conv4_3_norm_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_priorbox"
type: "PriorBox"
bottom: "conv4_3_norm"
bottom: "data"
top: "conv4_3_norm_mbox_priorbox"
prior_box_param {
min_size: 30.0
aspect_ratio: 2
flip: true
clip: true
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
}
}
layer {
name: "fc7_mbox_loc"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_loc_perm"
type: "Permute"
bottom: "fc7_mbox_loc"
top: "fc7_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_loc_flat"
type: "Flatten"
bottom: "fc7_mbox_loc_perm"
top: "fc7_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_conf"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_conf_perm"
type: "Permute"
bottom: "fc7_mbox_conf"
top: "fc7_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_conf_flat"
type: "Flatten"
bottom: "fc7_mbox_conf_perm"
top: "fc7_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_priorbox"
type: "PriorBox"
bottom: "fc7"
bottom: "data"
top: "fc7_mbox_priorbox"
prior_box_param {
min_size: 60.0
max_size: 114.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: true
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
}
}
layer {
name: "conv6_2_mbox_loc"
type: "Convolution"
bottom: "conv6_2"
top: "conv6_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_loc_perm"
type: "Permute"
bottom: "conv6_2_mbox_loc"
top: "conv6_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv6_2_mbox_loc_perm"
top: "conv6_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_conf"
type: "Convolution"
bottom: "conv6_2"
top: "conv6_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_conf_perm"
type: "Permute"
bottom: "conv6_2_mbox_conf"
top: "conv6_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv6_2_mbox_conf_perm"
top: "conv6_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv6_2"
bottom: "data"
top: "conv6_2_mbox_priorbox"
prior_box_param {
min_size: 114.0
max_size: 168.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: true
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
}
}
layer {
name: "conv7_2_mbox_loc"
type: "Convolution"
bottom: "conv7_2"
top: "conv7_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_loc_perm"
type: "Permute"
bottom: "conv7_2_mbox_loc"
top: "conv7_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv7_2_mbox_loc_perm"
top: "conv7_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_conf"
type: "Convolution"
bottom: "conv7_2"
top: "conv7_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_conf_perm"
type: "Permute"
bottom: "conv7_2_mbox_conf"
top: "conv7_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv7_2_mbox_conf_perm"
top: "conv7_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv7_2"
bottom: "data"
top: "conv7_2_mbox_priorbox"
prior_box_param {
min_size: 168.0
max_size: 222.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: true
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
}
}
layer {
name: "conv8_2_mbox_loc"
type: "Convolution"
bottom: "conv8_2"
top: "conv8_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_loc_perm"
type: "Permute"
bottom: "conv8_2_mbox_loc"
top: "conv8_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv8_2_mbox_loc_perm"
top: "conv8_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_conf"
type: "Convolution"
bottom: "conv8_2"
top: "conv8_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_conf_perm"
type: "Permute"
bottom: "conv8_2_mbox_conf"
top: "conv8_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv8_2_mbox_conf_perm"
top: "conv8_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv8_2"
bottom: "data"
top: "conv8_2_mbox_priorbox"
prior_box_param {
min_size: 222.0
max_size: 276.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: true
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
}
}
layer {
name: "pool6_mbox_loc"
type: "Convolution"
bottom: "pool6"
top: "pool6_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "pool6_mbox_loc_perm"
type: "Permute"
bottom: "pool6_mbox_loc"
top: "pool6_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "pool6_mbox_loc_flat"
type: "Flatten"
bottom: "pool6_mbox_loc_perm"
top: "pool6_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "pool6_mbox_conf"
type: "Convolution"
bottom: "pool6"
top: "pool6_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "pool6_mbox_conf_perm"
type: "Permute"
bottom: "pool6_mbox_conf"
top: "pool6_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "pool6_mbox_conf_flat"
type: "Flatten"
bottom: "pool6_mbox_conf_perm"
top: "pool6_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "pool6_mbox_priorbox"
type: "PriorBox"
bottom: "pool6"
bottom: "data"
top: "pool6_mbox_priorbox"
prior_box_param {
min_size: 276.0
max_size: 330.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: true
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
}
}
layer {
name: "mbox_loc"
type: "Concat"
bottom: "conv4_3_norm_mbox_loc_flat"
bottom: "fc7_mbox_loc_flat"
bottom: "conv6_2_mbox_loc_flat"
bottom: "conv7_2_mbox_loc_flat"
bottom: "conv8_2_mbox_loc_flat"
bottom: "pool6_mbox_loc_flat"
top: "mbox_loc"
concat_param {
axis: 1
}
}
layer {
name: "mbox_conf"
type: "Concat"
bottom: "conv4_3_norm_mbox_conf_flat"
bottom: "fc7_mbox_conf_flat"
bottom: "conv6_2_mbox_conf_flat"
bottom: "conv7_2_mbox_conf_flat"
bottom: "conv8_2_mbox_conf_flat"
bottom: "pool6_mbox_conf_flat"
top: "mbox_conf"
concat_param {
axis: 1
}
}
layer {
name: "mbox_priorbox"
type: "Concat"
bottom: "conv4_3_norm_mbox_priorbox"
bottom: "fc7_mbox_priorbox"
bottom: "conv6_2_mbox_priorbox"
bottom: "conv7_2_mbox_priorbox"
bottom: "conv8_2_mbox_priorbox"
bottom: "pool6_mbox_priorbox"
top: "mbox_priorbox"
concat_param {
axis: 2
}
}
layer {
name: "mbox_loss"
type: "MultiBoxLoss"
bottom: "mbox_loc"
bottom: "mbox_conf"
bottom: "mbox_priorbox"
bottom: "label"
top: "mbox_loss"
include {
phase: TRAIN
}
propagate_down: true
propagate_down: true
propagate_down: false
propagate_down: false
loss_param {
normalization: VALID
}
multibox_loss_param {
loc_loss_type: SMOOTH_L1
conf_loss_type: SOFTMAX
loc_weight: 1.0
num_classes: 21
share_location: true
match_type: PER_PREDICTION
overlap_threshold: 0.5
use_prior_for_matching: true
background_label_id: 0
use_difficult_gt: true
do_neg_mining: true
neg_pos_ratio: 3.0
neg_overlap: 0.5
code_type: CENTER_SIZE
}
}

网络的pooling5之前的结构跟VGG一样，

到了pooling5

<pre name="code" class="html">pooling_param {
pool: MAX
kernel_size: 3
stride: 1
pad: 1
}

这也就是为什么fc6的输入大小是19x19了

fc6

layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 1024
pad: 6
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
dilation: 6
}
}fc6由VGG的全连接替换为了卷积层，并且采用了dilation机制，下面介绍下Convlution layer里的dilation:

根据文章《MULTI-SCALE CONTEXT AGGREGATION BYDILATED CONVOLUTIONS》的介绍，dilation 主要是增加feature map 的 receptive field(感受野)。

通过最近的caffe 源码中的im2col中的代码可知：

void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
Dtype* data_col) {
const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
const int channel_size = height * width;
for (int channel = channels; channel--; data_im += channel_size) {
for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
int input_row = -pad_h + kernel_row * dilation_h;
for (int output_rows = output_h; output_rows; output_rows--) {
if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
for (int output_cols = output_w; output_cols; output_cols--) {
*(data_col++) = 0;
}
} else {
int input_col = -pad_w + kernel_col * dilation_w;
for (int output_col = output_w; output_col; output_col--) {
if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
*(data_col++) = data_im[input_row * width + input_col];
} else {
*(data_col++) = 0;
}
input_col += stride_w;
}
}
input_row += stride_h;
}
}
}
}
}带有dilation的卷积层的输出为：

const int output_h = (height + 2 * pad_h -
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int output_w = (width + 2 * pad_w -
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

因此fc6的输出也就是fc7的输入大小也为19x19.

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： MultiBox detector Single Shot ssd 目标检测 dilation

相关文章推荐

新的分享

章节导航

SSD：Single Shot MultiBox Detector 心得 之神经网路（持续更新中。。。）

SSD：Single Shot MultiBox Detector 心得之神经网路（持续更新中。。。）