您的位置:首页 > Web前端

Caffe 源码阅读笔记 [基本模块] Caffe.cpp

2016-09-28 22:18 591 查看

概述

到目前为止,我们已经把所有Caffe的基本模块从最基本的SyncedMemory到最上层的Solver都过了一遍。那么Caffe最后是怎么把他们串在一起的呢?这一篇主要讲解Caffe从main函数开始是怎么完成整个训练过程和测试过程的

Caffe支持的命令

train: 训练或者调整一个模型

test : 在测试集上测试一个模型

device_query : 打印GPU的调试信息

time: 压测一个模型的执行时间

Train函数

int train() {
vector<string> stages = get_stages_from_flags();
caffe::SolverParameter solver_param;
// 从FLAGS_solver文件里面读取solver配置
caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);
solver_param.mutable_train_state()->set_level(FLAGS_level);
for (int i = 0; i < stages.size(); i++) {
solver_param.mutable_train_state()->add_stage(stages[i]);
}
vector<int> gpus;
get_gpus(&gpus);
if (gpus.size() == 0) {
Caffe::set_mode(Caffe::CPU); // 如果没有GPU,使用CPU
} else {
solver_param.set_device_id(gpus[0]); // 设置当前GPU为gpus[0]
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
Caffe::set_solver_count(gpus.size());
}

caffe::SignalHandler signal_handler(
GetRequestedAction(FLAGS_sigint_effect), //注册收到SIGINT信号时做什么,默认是停止训练
GetRequestedAction(FLAGS_sighup_effect)); //注册收到SIGHUP信号时做什么,默认是做Snapshot
// 通过SolverFactory创建一个Solver类
shared_ptr<caffe::Solver<float> >
solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));
solver->SetActionFunction(signal_handler.GetActionFunction());

if (FLAGS_snapshot.size()) {
solver->Restore(FLAGS_snapshot.c_str()); // 如果已经有snapshot,从snapshot开始训练
} else if (FLAGS_weights.size()) {
CopyLayers(solver.get(), FLAGS_weights); // 如果已经有保存的参数,则拷贝到网络里开始训练
}

if (gpus.size() > 1) { // 如果GPU个数多于一个,则开始GPU并行训练
caffe::P2PSync<float> sync(solver, NULL, solver->param());
sync.Run(gpus);
} else {
solver->Solve(); // 否则进行网络参数优化
}
return 0;
}


Test函数

int test() {
vector<string> stages = get_stages_from_flags();
vector<int> gpus;
get_gpus(&gpus);
if (gpus.size() != 0) {
Caffe::SetDevice(gpus[0]); // 使用GPU gpus[0]
Caffe::set_mode(Caffe::GPU);
} else {
Caffe::set_mode(Caffe::CPU); // 使用CPU
}
Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages); // 创建一个测试网络
caffe_net.CopyTrainedLayersFrom(FLAGS_weights); // 把训练好的参数拷贝到网络里
vector<int> test_score_output_id;
vector<float> test_score;
float loss = 0;
for (int i = 0; i < FLAGS_iterations; ++i) {
float iter_loss;
const vector<Blob<float>*>& result = caffe_net.Forward(&iter_loss); // 进行前向传播
loss += iter_loss; // 累计Loss值以求平均值
int idx = 0;
for (int j = 0; j < result.size(); ++j) {
const float* result_vec = result[j]->cpu_data();
for (int k = 0; k < result[j]->count(); ++k, ++idx) {
const float score = result_vec[k];
if (i == 0) {
test_score.push_back(score);
test_score_output_id.push_back(j);
} else {
test_score[idx] += score; // 累计网络输出的blob的值以求平均值
}
}
}
}
loss /= FLAGS_iterations; // 除以迭代的次数计算平均值
for (int i = 0; i < test_score.size(); ++i) {
const float loss_weight = caffe_net.blob_loss_weights()[
caffe_net.output_blob_indices()[test_score_output_id[i]]];
const float mean_score = test_score[i] / FLAGS_iterations; // 除以迭代的次数求平均值
if (loss_weight) { // 对blob值进行loss_weight加权
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * mean_score << " loss)";
}
}
return 0;
}


device_query函数

int device_query() {
vector<int> gpus;
// 通过cudaGetDeviceCount(&count)得到GPU个数,然后返回GPU id 0,1,...,count-1
get_gpus(&gpus);
for (int i = 0; i < gpus.size(); ++i) {
caffe::Caffe::SetDevice(gpus[i]); // 设置当前GPU为gpus[i]
caffe::Caffe::DeviceQuery(); // 获得当前GPU信息
}
return 0;
}
// 设置当前GPU为device_id
void Caffe::SetDevice(const int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(¤t_device)); // 得到当前线程使用的GPU id
if (current_device == device_id) { // 如果已经是了,则退出
return;
}
CUDA_CHECK(cudaSetDevice(device_id)); // 设置这个线程使用GPU device_id
// Get()返回一个Caffe类型的Thread local,如果cublas_handle_和curand_generator_不为null,需要释放它们
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_
c171
));
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
}
// 重新建立一个新的cublas_handle_和curand_generator_并存在Thread local里
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
cluster_seedgen()));
}
// 进行设备的查询
void Caffe::DeviceQuery() {
cudaDeviceProp prop;
CUDA_CHECK(cudaGetDeviceProperties(&prop, device)); // 获取设备属性
// 答应属性内容,
LOG(INFO) << "Device id:                     " << device;
LOG(INFO) << "Major revision number:         " << prop.major;
LOG(INFO) << "Minor revision number:         " << prop.minor;
LOG(INFO) << "Name:                          " << prop.name;
LOG(INFO) << "Total global memory:           " << prop.totalGlobalMem;
LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
LOG(INFO) << "Total registers per block:     " << prop.regsPerBlock;
LOG(INFO) << "Warp size:                     " << prop.warpSize;
LOG(INFO) << "Maximum memory pitch:          " << prop.memPitch;
LOG(INFO) << "Maximum threads per block:     " << prop.maxThreadsPerBlock;
LOG(INFO) << "Maximum dimension of block:    "
<< prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
<< prop.maxThreadsDim[2];
LOG(INFO) << "Maximum dimension of grid:     "
<< prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
<< prop.maxGridSize[2];
LOG(INFO) << "Clock rate:                    " << prop.clockRate;
LOG(INFO) << "Total constant memory:         " << prop.totalConstMem;
LOG(INFO) << "Texture alignment:             " << prop.textureAlignment;
LOG(INFO) << "Concurrent copy and execution: "
<< (prop.deviceOverlap ? "Yes" : "No");
LOG(INFO) << "Number of multiprocessors:     " << prop.multiProcessorCount;
LOG(INFO) << "Kernel execution timeout:      "
<< (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
return;
}


time函数

int time() {
caffe::Phase phase = get_phase_from_flags(caffe::TRAIN);
vector<string> stages = get_stages_from_flags();
vector<int> gpus;
get_gpus(&gpus);
if (gpus.size() != 0) {
// 使用GPU gpus[0]
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
} else {
// 使用CPU
Caffe::set_mode(Caffe::CPU);
}
Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages);
float initial_loss;
// 先做一次前向和反向传播来预先分配好内存,这样能使测试的结果更加稳定
caffe_net.Forward(&initial_loss); // 做一次前向传播。因为是速度测试,网络没有输入
caffe_net.Backward(); //再做一次反向传播
Timer total_timer, forward_timer, backward_timer, timer;
total_timer.Start();
std::vector<double> forward_time_per_layer(layers.size(), 0.0);
std::vector<double> backward_time_per_layer(layers.size(), 0.0);
double forward_time = 0.0;
double backward_time = 0.0;
// 做FLAGS_iterations次迭代
for (int j = 0; j < FLAGS_iterations; ++j) {
Timer iter_timer;
iter_timer.Start();
forward_timer.Start();
// 对每一层分别做一次前向传播,并记录每层传播的时间
for (int i = 0; i < layers.size(); ++i) {
timer.Start();
layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
forward_time_per_layer[i] += timer.MicroSeconds();
}
forward_time += forward_timer.MicroSeconds();
backward_timer.Start();
// 对每层分别做反向传播
for (int i = layers.size() - 1; i >= 0; --i) {
timer.Start();
layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
bottom_vecs[i]);
backward_time_per_layer[i] += timer.MicroSeconds();
}
backward_time += backward_timer.MicroSeconds();
}
// 输出每层前向传播和反向传播需要的平均时间
LOG(INFO) << "Average time per layer: ";
for (int i = 0; i < layers.size(); ++i) {
const caffe::string& layername = layers[i]->layer_param().name();
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
"\tforward: " << forward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms."; // 前向传播时间
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
"\tbackward: " << backward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms."; // 反向传播时间
}
total_timer.Stop();
LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
FLAGS_iterations << " ms."; // 平均整个网络的前向传播时间
LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
FLAGS_iterations << " ms."; // 整个网络的平均反向传播时间
LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
FLAGS_iterations << " ms."; // 整个网络的前向加反向传播时间
LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
LOG(INFO) << "*** Benchmark ends ***";
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  源码 深度学习 caffe