caffe 实战系列:proto文件格式以及含义解析:如何定义网络,如何设置网络参数(以AlexNet为例) 2016.3.30

name: "AlexNet"
layer { # 数据层
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN #include 表明这是在训练阶段才包括进去
transform_param { # 对数据进行预处理,分别为做镜像,设定crop的大小为227,以及减去均值文件
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
data_param { # 设定数据的来源
source: "examples/imagenet/ilsvrc12_train_lmdb"
batch_size: 256
backend: LMDB
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include { # 规定只在测试的时候使用该层
phase: TEST
transform_param { # 测试的时候就不做镜像了
mirror: false
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
data_param {
source: "examples/imagenet/ilsvrc12_val_lmdb"
batch_size: 50
backend: LMDB
layer { # 卷积层
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param { # 通用的有关于学习的参数,学习率和权重衰减率,这里是两个学习率是因为定义了卷积组,且大小为2,所以是两个参数
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
convolution_param { # 卷积层的参数,卷积核以及偏置
num_output: 96
kernel_size: 11
stride: 4
# 但是conv1却又没有定义group:2,下面的卷积层倒是都定义了,所以这有点奇怪。
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer { # relu层
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
layer { # norm层
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
layer { # 池化层
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2# 卷积组的大小为2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0.1
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
layer {
name: "norm2"
type: "LRN"
bottom: "conv2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
layer {
name: "pool2"
type: "Pooling"
bottom: "norm2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2# 卷积组的大小为2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0.1
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2# 卷积组的大小为2
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0.1
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
bias_filler {
type: "constant"
value: 0.1
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
bias_filler {
type: "constant"
value: 0.1
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
lr_mult: 1
decay_mult: 1
param {
lr_mult: 2
decay_mult: 0
inner_product_param {
num_output: 1000
weight_filler {
type: "gaussian"
std: 0.01
bias_filler {
type: "constant"
value: 0
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracy"
include {# 测试阶段才包括该层
phase: TEST
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"


// Message that stores parameters used to apply transformation
// to the data layer's data
message TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
// 对像素值进行缩放pixelvalue = scale*pixelvalue
optional float scale = 1 [default = 1];
// Specify if we want to randomly mirror data.
// 是否对图像进行镜像
optional bool mirror = 2 [default = false];
// Specify if we would like to randomly crop an image.
// 随机切割图像的大小
optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time
// 均值文件的路径
optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
// 如果不使用均值文件,用均值也可以的
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
// 强制认为数据是三通道的(彩色的)
optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels.
// 强制认为数据是单通道的(灰度的)
optional bool force_gray = 7 [default = false];

(2)数据输入层中数据源的参数data_param 定义

message DataParameter {
enum DB { // 数据库的类型LEVELDB还是LMDB类型
LMDB = 1;
// Specify the data source.
// 数据库文件的路径
optional string source = 1;
// Specify the batch size.
// 批大小
optional uint32 batch_size = 4;
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
// DEPRECATED. Each solver accesses a different subset of the database.
// 随机跳过前rand_skip个,这里程序中会生成[0,rand_skip-1]之间的一个随机数然后跳过这个数值个的数据
optional uint32 rand_skip = 7 [default = 0];
// 数据库的后端是使用的什么类型的数据库
optional DB backend = 8 [default = LEVELDB];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling.
// 该参数已经过时,应该在TransformationParameter进行定义,上面我已经给出了这部分参数的定义
optional float scale = 2 [default = 1];
optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
// crop an image. 该参数已经过时
optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
// data. 该参数已经过时
optional bool mirror = 6 [default = false];
// Force the encoded image to have 3 color channels
// 强制认为存储的图像是彩色的
optional bool force_encoded_color = 9 [default = false];
// Prefetch queue (Number of batches to prefetch to host memory, increase if
// data access bandwidth varies).
// 预取队列的个数
optional uint32 prefetch = 10 [default = 4];


// LayerParameter next available layer-specific ID: 139 (last added: tile_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob

// The train / test phase for computation.
optional Phase phase = 10;

// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 5;

// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
repeated ParamSpec param = 6;// 就是这货

message ParamSpec {
// The names of the parameter blobs -- useful for sharing parameters among
// layers, but never required otherwise. To share a parameter between two
// layers, give it a (non-empty) name.
optional string name = 1;

// Whether to require shared weights to have the same shape, or just the same
// count -- defaults to STRICT if unspecified.
optional DimCheckMode share_mode = 2;
enum DimCheckMode {
// STRICT (default) requires that num, channels, height, width each match.
// PERMISSIVE requires only the count (num*channels*height*width) to match.

// The multiplier on the global learning rate for this parameter.
optional float lr_mult = 3 [default = 1.0];

// The multiplier on the global weight decay for this parameter.
optional float decay_mult = 4 [default = 1.0];


message ConvolutionParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms

// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in all spatial dimensions, or once per spatial dimension.
// 是否padding
repeated uint32 pad = 3; // The padding size; defaults to 0
// 核大小
repeated uint32 kernel_size = 4; // The kernel size
// 步长
repeated uint32 stride = 6; // The stride; defaults to 1

// For 2D convolution only, the *_h and *_w versions may also be used to
// specify both spatial dimensions.
// 对于二维卷积来说是可以设定pad、kernel以及步长的宽度和高度不一样的
optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
optional uint32 kernel_h = 11; // The kernel height (2D only)
optional uint32 kernel_w = 12; // The kernel width (2D only)
optional uint32 stride_h = 13; // The stride height (2D only)
optional uint32 stride_w = 14; // The stride width (2D only)

// 每一个卷积组的大小
optional uint32 group = 5 [default = 1]; // The group size for group conv

// 这就是初始化权重和偏置的参数啦
optional FillerParameter weight_filler = 7; // The filler for the weight
optional FillerParameter bias_filler = 8; // The filler for the bias
enum Engine {
CAFFE = 1;
CUDNN = 2;
// 使用CPU还是GPU计算
optional Engine engine = 15 [default = DEFAULT];

// The axis to interpret as "channels" when performing convolution.
// Preceding dimensions are treated as independent inputs;
// succeeding dimensions are treated as "spatial".
// With (N, C, H, W) inputs, and axis == 1 (the default), we perform
// N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
// groups g>1) filters across the spatial axes (H, W) of the input.
// With (N, C, D, H, W) inputs, and axis == 1, we perform
// N independent 3D convolutions, sliding (C/g)-channels
// filters across the spatial axes (D, H, W) of the input.
// 通道数,如果该值是1,那么如果数据是(N,C,H,W)
// 那么就进行N个独立的二维卷积
// 如果数据是(N,C,D,H,W),那么就进行三维卷积
optional int32 axis = 16 [default = 1];

// Whether to force use of the general ND convolution, even if a specific
// implementation for blobs of the appropriate number of spatial dimensions
// is available. (Currently, there is only a 2D-specific convolution
// implementation; for input blobs with num_axes != 2, this option is
// ignored and the ND implementation will be used.)
// 强制使用通用的N维卷积方法
// 如果num_axes!=2就会使用N维卷积
optional bool force_nd_im2col = 17 [default = false];



message FillerParameter {
// The filler type.
// 初始化类型
optional string type = 1 [default = 'constant'];
// 如果是常数初始化的话需要该值
optional float value = 2 [default = 0]; // the value in constant filler
// 如果是均匀分布初始化则需要min和max
optional float min = 3 [default = 0]; // the min value in uniform filler
optional float max = 4 [default = 1]; // the max value in uniform filler
// 如果是高斯分布初始化则需要mean和std
optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
optional float std = 6 [default = 1]; // the std value in Gaussian filler
// The expected number of non-zero output weights for a given input in
// Gaussian filler -- the default -1 means don't perform sparsification.
// 是否需要稀疏特性
optional int32 sparse = 7 [default = -1];
// Normalize the filler variance by fan_in, fan_out, or their average.
// Applies to 'xavier' and 'msra' fillers.
// 对于xavier和msra两种权重初始化需要设置归一化的类型是
// 使用扇入还是扇出还是扇入+扇出进行归一化
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
optional VarianceNorm variance_norm = 8 [default = FAN_IN];


// Message that stores parameters used by LRNLayer
message LRNParameter {
optional uint32 local_size = 1 [default = 5];
optional float alpha = 2 [default = 1.];
optional float beta = 3 [default = 0.75];
enum NormRegion {
optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
optional float k = 5 [default = 1.];



message InnerProductParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
optional FillerParameter weight_filler = 3; // The filler for the weight
optional FillerParameter bias_filler = 4; // The filler for the bias

// The first axis to be lumped into a single inner product computation;
// all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 5 [default = 1];



message PoolingParameter {
enum PoolMethod { // 几种池化方法
MAX = 0;
AVE = 1;
optional PoolMethod pool = 1 [default = MAX]; // The pooling method
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs.
// 如果使用pad参数则认为是正方形的,如果使用pad_h和pad_w则认为是矩形的
// 同理kernel_size也是、stride也是
optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
optional uint32 pad_h = 9 [default = 0]; // The padding height
optional uint32 pad_w = 10 [default = 0]; // The padding width
optional uint32 kernel_size = 2; // The kernel size (square)
optional uint32 kernel_h = 5; // The kernel height
optional uint32 kernel_w = 6; // The kernel width
optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
optional uint32 stride_h = 7; // The stride height
optional uint32 stride_w = 8; // The stride width
enum Engine {
CAFFE = 1;
CUDNN = 2;
optional Engine engine = 11 [default = DEFAULT];
// If global_pooling then it will pool over the size of the bottom by doing
// kernel_h = bottom->height and kernel_w = bottom->width
optional bool global_pooling = 12 [default = false];


message DropoutParameter {
optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio


