// Copyright 2014 BVLC and contributors.
#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"

//  主要是三个方法,setup,forward,backward
//  setup 初始化网络参数,包括了w和b
//	forward 前向传播的实现
//	backward 后向传播的实现

//   M_ 表示的样本数
//	K_ 表示单个样本的特征长度
//	N_ 表示输出神经元的个数
namespace caffe {

template <typename Dtype>
void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top)
CHECK_EQ(bottom.size(), 1) << "IP Layer takes a single blob as input.";
CHECK_EQ(top->size(), 1) << "IP Layer takes a single blob as output.";

const int num_output = this->layer_param_.inner_product_param().num_output();
bias_term_ = this->layer_param_.inner_product_param().bias_term();

// Figure out the dimensions
M_ = bottom[0]->num();//表示样本数
K_ = bottom[0]->count() / bottom[0]->num();//表示单个样本的特征长度,count_ = num_ * channels_ * height_ * width_;
N_ = num_output; //全连接之后输出的神经元的个数
(*top)[0]->Reshape(bottom[0]->num(), num_output, 1, 1);//全连接层输出的Blob维数为样本的个数*输出神经元的个数*1*1(M*N)

// Check if we need to set up the weights
if (this->blobs_.size() > 0)
LOG(INFO) << "Skipping parameter initialization";
if (bias_term_)

// Intialize the weight
//vector<shared_ptr<Blob<Dtype> > > blobs_;
this->blobs_[0].reset(new Blob<Dtype>(1, 1, N_, K_));//新开辟一个Blob,指针返回给blobs_[0];

// fill the weights
//根据配置文件中的权重核( weight_filler)的类型初始化填充权重矩阵blobs_[0];
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(

// If necessary, intiialize and fill the bias term
if (bias_term_)
this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, N_));
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
}  // parameter initialization

// Setting up the bias multiplier
if (bias_term_)
//shared_ptr<SyncedMemory> bias_multiplier_;
bias_multiplier_.reset(new SyncedMemory(M_ * sizeof(Dtype)));
Dtype* bias_multiplier_data =

for (int i = 0; i < M_; ++i)
bias_multiplier_data[i] = 1.;

template <typename Dtype>
//实现的功能就是 y=wx+b
//  x为输入,维度 M_*K_
//	y为输出,维度 M_*N_
//	w为权重,维度 K_*N_
//	b为偏置,维度 N_*1_
Dtype InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top)
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
const Dtype* weight = this->blobs_[0]->cpu_data();//内存中的权重矩阵是N*K


//这一步表示 y←wx,或者说是y←xw'
//bottom_data:M*K, weight:N*K, top_data:M*N
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_,
(Dtype)1., bottom_data, weight, (Dtype)0., top_data);
//# 这一步表示 y←y+b
if (bias_term_)
const Dtype* bias = this->blobs_[1]->cpu_data();

caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1,
(Dtype)1., reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
bias, (Dtype)1., top_data);
return Dtype(0);

template <typename Dtype>
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
//data传递的是数据,diff传递的是梯度,top_diff的维度是N*M,每一列代表一个样本的error term
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* bottom_data = (*bottom)[0]->cpu_data();
// Gradient with respect to weight
//top_diff:M*N, bottom_data:M*K, this->blobs_[0]->mutable_cpu_diff():N*K
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
if (bias_term_) {
// Gradient with respect to bias
caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), (Dtype)0.,
if (propagate_down) {
// Gradient with respect to bottom data
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
top_diff, this->blobs_[0]->cpu_data(), (Dtype)0.,


}  // namespace caffe
