您的位置：首页 > Web前端

【Caffe】002 caffe.cpp源码解析

2016-12-01 21:18 393 查看

path-to-caffe/include/caffe/caffe.hpp

// caffe.hpp is the header file that you need to include in your code. It wraps
// all the internal caffe header files into one for simpler inclusion.

#ifndef CAFFE_CAFFE_HPP_
#define CAFFE_CAFFE_HPP_

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/layer_factory.hpp"
#include "caffe/net.hpp"
#include "caffe/parallel.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/solver.hpp"
#include "caffe/solver_factory.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/upgrade_proto.hpp"

#endif  // CAFFE_CAFFE_HPP_

path-to-caffe/tools/caffe.cpp

#ifdef WITH_PYTHON_LAYER
#include "boost/python.hpp"
namespace bp = boost::python;
#endif

#include <gflags/gflags.h>
#include <glog/logging.h>

#include <cstring>
#include <map>
#include <string>
#include <vector>

#include "boost/algorithm/string.hpp"
#include "caffe/caffe.hpp"
#include "caffe/util/signal_handler.h"

using caffe::Blob;
using caffe::Caffe;
using caffe::Net;
using caffe::Layer;
using caffe::Solver;
using caffe::shared_ptr;
using caffe::string;
using caffe::Timer;
using caffe::vector;
using std::ostringstream;

// GFLAGS，用于命令行解析
DEFINE_string(gpu, "",
"Optional; run in GPU mode on given device IDs separated by ','."
"Use '-gpu all' to run on all available GPUs. The effective training "
"batch size is multiplied by the number of devices.");
DEFINE_string(solver, "",
"The solver definition protocol buffer text file.");
DEFINE_string(model, "",
"The model definition protocol buffer text file.");
DEFINE_string(phase, "",
"Optional; network phase (TRAIN or TEST). Only used for 'time'.");
DEFINE_int32(level, 0,
"Optional; network level.");
DEFINE_string(stage, "",
"Optional; network stages (not to be confused with phase), "
"separated by ','.");
DEFINE_string(snapshot, "",
"Optional; the snapshot solver state to resume training.");
DEFINE_string(weights, "",
"Optional; the pretrained weights to initialize finetuning, "
"separated by ','. Cannot be set simultaneously with snapshot.");
DEFINE_int32(iterations, 50,
"The number of iterations to run.");
DEFINE_string(sigint_effect, "stop",
"Optional; action to take when a SIGINT signal is received: "
"snapshot, stop or none.");
DEFINE_string(sighup_effect, "snapshot",
"Optional; action to take when a SIGHUP signal is received: "
"snapshot, stop or none.");

// Caffe命令注册
typedef int (*BrewFunction)();
typedef std::map<caffe::string, BrewFunction> BrewMap;
BrewMap g_brew_map;

#define RegisterBrewFunction(func) \
namespace { \
class __Registerer_##func { \
public: /* NOLINT */ \
__Registerer_##func() { \
g_brew_map[#func] = &func; \
} \
}; \
__Registerer_##func g_registerer_##func; \
}

static BrewFunction GetBrewFunction(const caffe::string& name) {
if (g_brew_map.count(name)) {
return g_brew_map[name];
} else {
LOG(ERROR) << "Available caffe actions:";
for (BrewMap::iterator it = g_brew_map.begin();
it != g_brew_map.end(); ++it) {
LOG(ERROR) << "\t" << it->first;
}
LOG(FATAL) << "Unknown action: " << name;
return NULL;  // not reachable, just to suppress old compiler warnings.
}
}

// 解析命令行参数 -gpu
// 使用指定ID的GPU，或者使用所有可用的GPU。
static void get_gpus(vector<int>* gpus) {
if (FLAGS_gpu == "all") { // 对应命令行参数 -gpu all
int count = 0;
#ifndef CPU_ONLY
CUDA_CHECK(cudaGetDeviceCount(&count)); // 获取可用GPU数量
#else
NO_GPU;
#endif
for (int i = 0; i < count; ++i) {
gpus->push_back(i);
}
} else if (FLAGS_gpu.size()) {
vector<string> strings;
boost::split(strings, FLAGS_gpu, boost::is_any_of(",")); // 对应命令行指定多个GPU设备ID号时用“，”间隔
for (int i = 0; i < strings.size(); ++i) {
gpus->push_back(boost::lexical_cast<int>(strings[i]));
}
} else {
CHECK_EQ(gpus->size(), 0);
}
}

// 解析命令行参数 -phase
// 指定TRAIN或TEST阶段，评估模型计算耗时（time命令）时使用。
// 该函数只被time()函数调用，default_value=caffe::TRAIN。
caffe::Phase get_phase_from_flags(caffe::Phase default_value) {
if (FLAGS_phase == "")
return default_value;
if (FLAGS_phase == "TRAIN")
return caffe::TRAIN;
if (FLAGS_phase == "TEST")
return caffe::TEST;
LOG(FATAL) << "phase must be \"TRAIN\" or \"TEST\"";
return caffe::TRAIN;  // Avoid warning
}

// 解析命令行参数 -stage
vector<string> get_stages_from_flags() {
vector<string> stages;
boost::split(stages, FLAGS_stage, boost::is_any_of(","));
return stages;
}

// caffe命令调用形式如下
//     caffe <command> <args>
//
// 如果想要增加新的命令，先定义相应函数"int command()"，
// 然后使用RegisterBrewFunction(command)进行注册。

// 设备诊断：显示指定GPU设备（-gpu参数）的诊断信息
int device_query() {
LOG(INFO) << "Querying GPUs " << FLAGS_gpu;
vector<int> gpus;
get_gpus(&gpus);
for (int i = 0; i < gpus.size(); ++i) {
caffe::Caffe::SetDevice(gpus[i]); // 调用cudaGetDevice()
caffe::Caffe::DeviceQuery(); // 调用cudaGetDeviceProperties()
}
return 0;
}
RegisterBrewFunction(device_query);

// 解析命令行参数 -weights，即model_list
// 从指定caffemodel(s)文件中载入预训练的（训练/测试）网络模型参数
void CopyLayers(caffe::Solver<float>* solver, const std::string& model_list) {
std::vector<std::string> model_names;
boost::split(model_names, model_list, boost::is_any_of(",") );
for (int i = 0; i < model_names.size(); ++i) {
LOG(INFO) << "Finetuning from " << model_names[i];
solver->net()->CopyTrainedLayersFrom(model_names[i]);
for (int j = 0; j < solver->test_nets().size(); ++j) {
solver->test_nets()[j]->CopyTrainedLayersFrom(model_names[i]);
}
}
}

// 解析命令行参数 -sighup_effect 和 -sigint_effect
// 设置训练时（train命令）接收到挂起或中断信号时的处理机制
caffe::SolverAction::Enum GetRequestedAction(
const std::string& flag_value) {
if (flag_value == "stop") {
return caffe::SolverAction::STOP; // 保存快照并退出
}
if (flag_value == "snapshot") {
return caffe::SolverAction::SNAPSHOT; // 保存快照
}
if (flag_value == "none") {
return caffe::SolverAction::NONE; // 不作处理
}
LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified";
}

// 训练或微调
int train() {
CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train."; // 必须有-solver参数
CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size()) // 不能同时设置-snapshot和-weight参数
<< "Give a snapshot to resume training or weights to finetune "
"but not both.";
vector<string> stages = get_stages_from_flags();

caffe::SolverParameter solver_param;
caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param); // 从solver定义文件中解析solver参数

solver_param.mutable_train_state()->set_level(FLAGS_level);
for (int i = 0; i < stages.size(); i++) {
solver_param.mutable_train_state()->add_stage(stages[i]);
}

// 如果命令行没有使用-gpu参数指定GPU设备，查看solver定义文件中是否指定使用GPU。
// solver定义文件中使用solver_mode指定使用GPU/CPU，使用device_id指定GPU设备号。
if (FLAGS_gpu.size() == 0
&& solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
if (solver_param.has_device_id()) {
FLAGS_gpu = "" +
boost::lexical_cast<string>(solver_param.device_id());
} else {  // 如果solver定义文件中指定使用GPU但未指定ID号，默认使用ID=0的GPU设备。
FLAGS_gpu = "" + boost::lexical_cast<string>(0);
}
}

// 设置使用的GPU设备及运行模式
vector<int> gpus;
get_gpus(&gpus);
if (gpus.size() == 0) {
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
} else {
ostringstream s;
for (int i = 0; i < gpus.size(); ++i) {
s << (i ? ", " : "") << gpus[i];
}
LOG(INFO) << "Using GPUs " << s.str();
#ifndef CPU_ONLY
cudaDeviceProp device_prop;
for (int i = 0; i < gpus.size(); ++i) {
cudaGetDeviceProperties(&device_prop, gpus[i]);
LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name;
}
#endif
solver_param.set_device_id(gpus[0]);
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
Caffe::set_solver_count(gpus.size());
}

// 设置训练时（train命令）接收到挂起或中断信号时的处理机制
caffe::SignalHandler signal_handler(
GetRequestedAction(FLAGS_sigint_effect),
GetRequestedAction(FLAGS_sighup_effect));

shared_ptr<caffe::Solver<float> >
solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));

solver->SetActionFunction(signal_handler.GetActionFunction());

// 解析命令行参数 -snapshot
// 从之前保存的快照中恢复训练状态
if (FLAGS_snapshot.size()) {
LOG(INFO) << "Resuming from " << FLAGS_snapshot;
solver->Restore(FLAGS_snapshot.c_str());
}
// 解析命令行参数 -weights
// 从指定caffemodel(s)文件中载入预训练的（训练/测试）网络模型参数
else if (FLAGS_weights.size()) {
CopyLayers(solver.get(), FLAGS_weights);
}

// 多GPU并行
if (gpus.size() > 1) {
caffe::P2PSync<float> sync(solver, NULL, solver->param());
sync.Run(gpus);
} else {
LOG(INFO) << "Starting Optimization";
solver->Solve();
}
LOG(INFO) << "Optimization Done.";
return 0;
}
RegisterBrewFunction(train);

// 评估已训练模型性能
int test() {
CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score."; // 必须有-model参数
CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score."; // 必须有-weight参数
vector<string> stages = get_stages_from_flags();

// 设置使用的GPU设备及运行模式
vector<int> gpus;
get_gpus(&gpus);
if (gpus.size() != 0) {
LOG(INFO) << "Use GPU with device ID " << gpus[0];
#ifndef CPU_ONLY
cudaDeviceProp device_prop;
cudaGetDeviceProperties(&device_prop, gpus[0]);
LOG(INFO) << "GPU device name: " << device_prop.name;
#endif
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
} else {
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
}

// 实例化网络模型
Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages);
// 解析命令行参数 -weights
// 从指定caffemodel(s)文件中载入已训练的测试网络模型参数
caffe_net.CopyTrainedLayersFrom(FLAGS_weights);
LOG(INFO) << "Running for " << FLAGS_iterations << " iterations."; // -iterations参数指定迭代次数

vector<int> test_score_output_id;
vector<float> test_score;
float loss = 0;
for (int i = 0; i < FLAGS_iterations; ++i) {
float iter_loss;
const vector<Blob<float>*>& result =
caffe_net.Forward(&iter_loss); // 运行前向传播，得到损失值
loss += iter_loss;
int idx = 0;
for (int j = 0; j < result.size(); ++j) {
const float* result_vec = result[j]->cpu_data();
for (int k = 0; k < result[j]->count(); ++k, ++idx) {
const float score = result_vec[k];
if (i == 0) {
test_score.push_back(score);
test_score_output_id.push_back(j);
} else {
test_score[idx] += score;
}
const std::string& output_name = caffe_net.blob_names()[
caffe_net.output_blob_indices()[j]];
LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score;
}
}
}
loss /= FLAGS_iterations;
LOG(INFO) << "Loss: " << loss;
for (int i = 0; i < test_score.size(); ++i) {
const std::string& output_name = caffe_net.blob_names()[
caffe_net.output_blob_indices()[test_score_output_id[i]]];
const float loss_weight = caffe_net.blob_loss_weights()[
caffe_net.output_blob_indices()[test_score_output_id[i]]];
std::ostringstream loss_msg_stream;
const float mean_score = test_score[i] / FLAGS_iterations;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * mean_score << " loss)";
}
LOG(INFO) << output_name << " = " << mean_score << loss_msg_stream.str();
}

return 0;
}
RegisterBrewFunction(test);

// 评估模型执行耗时
int time() {
CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time."; // 必须有-model参数
caffe::Phase phase = get_phase_from_flags(caffe::TRAIN); // 默认-phase参数为TRAIN
vector<string> stages = get_stages_from_flags();

// 设置使用的GPU设备及运行模式
vector<int> gpus;
get_gpus(&gpus);
if (gpus.size() != 0) {
LOG(INFO) << "Use GPU with device ID " << gpus[0];
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
} else {
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
}

// 实例化网络模型
Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages);

// 预先做一次完整的前向-反向传播，完成相应内存分配，后续迭代会更加稳定。
// 在进行耗时评估时，假定网络没有输入blobs。
LOG(INFO) << "Performing Forward";
float initial_loss;
caffe_net.Forward(&initial_loss);
LOG(INFO) << "Initial loss: " << initial_loss;
LOG(INFO) << "Performing Backward";
caffe_net.Backward();

const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
const vector<vector<bool> >& bottom_need_backward =
caffe_net.bottom_need_backward();
LOG(INFO) << "*** Benchmark begins ***";
LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations."; // -iterations参数指定迭代次数
Timer total_timer;
total_timer.Start();
Timer forward_timer;
Timer backward_timer;
Timer timer;
std::vector<double> forward_time_per_layer(layers.size(), 0.0);
std::vector<double> backward_time_per_layer(layers.size(), 0.0);
double forward_time = 0.0;
double backward_time = 0.0;
for (int j = 0; j < FLAGS_iterations; ++j) {
Timer iter_timer;
iter_timer.Start();
forward_timer.Start();
for (int i = 0; i < layers.size(); ++i) { // 各层前向传播
timer.Start();
layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
forward_time_per_layer[i] += timer.MicroSeconds();
}
forward_time += forward_timer.MicroSeconds();

aa8f
backward_timer.Start();
for (int i = layers.size() - 1; i >= 0; --i) { // 各层反向传播
timer.Start();
layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
bottom_vecs[i]);
backward_time_per_layer[i] += timer.MicroSeconds();
}
backward_time += backward_timer.MicroSeconds();
LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
<< iter_timer.MilliSeconds() << " ms.";
}
LOG(INFO) << "Average time per layer: ";
for (int i = 0; i < layers.size(); ++i) { // 各层前传/反传平均耗时
const caffe::string& layername = layers[i]->layer_param().name();
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
"\tforward: " << forward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
"\tbackward: " << backward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms.";
}
total_timer.Stop();
LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
FLAGS_iterations << " ms.";
LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
LOG(INFO) << "*** Benchmark ends ***";
return 0;
}
RegisterBrewFunction(time);

int main(int argc, char** argv) {
// 将日志输出到文件,同时输出到终端屏幕.
FLAGS_alsologtostderr = 1;
// Caffe版本
gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
// 用法信息
gflags::SetUsageMessage("command line brew\n"
"usage: caffe <command> <args>\n\n"
"commands:\n"
"  train           train or finetune a model\n"
"  test            score a model\n"
"  device_query    show GPU diagnostic information\n"
"  time            benchmark model execution time");
// 使用GFLAGS解析命令行参数，并初始化GLOG用于日志输出。
caffe::GlobalInit(&argc, &argv);

if (argc == 2) {
#ifdef WITH_PYTHON_LAYER
try {
#endif
return GetBrewFunction(caffe::string(argv[1]))();
#ifdef WITH_PYTHON_LAYER
} catch (bp::error_already_set) {
PyErr_Print();
return 1;
}
#endif
} else {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/caffe");
}
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： Caffe 深度学习源码

相关文章推荐

新的分享

章节导航