Caffe 源码阅读笔记 [基本模块] Caffe.cpp
2016-09-28 22:18
591 查看
概述
到目前为止,我们已经把所有Caffe的基本模块从最基本的SyncedMemory到最上层的Solver都过了一遍。那么Caffe最后是怎么把他们串在一起的呢?这一篇主要讲解Caffe从main函数开始是怎么完成整个训练过程和测试过程的Caffe支持的命令
train: 训练或者调整一个模型test : 在测试集上测试一个模型
device_query : 打印GPU的调试信息
time: 压测一个模型的执行时间
Train函数
int train() { vector<string> stages = get_stages_from_flags(); caffe::SolverParameter solver_param; // 从FLAGS_solver文件里面读取solver配置 caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param); solver_param.mutable_train_state()->set_level(FLAGS_level); for (int i = 0; i < stages.size(); i++) { solver_param.mutable_train_state()->add_stage(stages[i]); } vector<int> gpus; get_gpus(&gpus); if (gpus.size() == 0) { Caffe::set_mode(Caffe::CPU); // 如果没有GPU,使用CPU } else { solver_param.set_device_id(gpus[0]); // 设置当前GPU为gpus[0] Caffe::SetDevice(gpus[0]); Caffe::set_mode(Caffe::GPU); Caffe::set_solver_count(gpus.size()); } caffe::SignalHandler signal_handler( GetRequestedAction(FLAGS_sigint_effect), //注册收到SIGINT信号时做什么,默认是停止训练 GetRequestedAction(FLAGS_sighup_effect)); //注册收到SIGHUP信号时做什么,默认是做Snapshot // 通过SolverFactory创建一个Solver类 shared_ptr<caffe::Solver<float> > solver(caffe::SolverRegistry<float>::CreateSolver(solver_param)); solver->SetActionFunction(signal_handler.GetActionFunction()); if (FLAGS_snapshot.size()) { solver->Restore(FLAGS_snapshot.c_str()); // 如果已经有snapshot,从snapshot开始训练 } else if (FLAGS_weights.size()) { CopyLayers(solver.get(), FLAGS_weights); // 如果已经有保存的参数,则拷贝到网络里开始训练 } if (gpus.size() > 1) { // 如果GPU个数多于一个,则开始GPU并行训练 caffe::P2PSync<float> sync(solver, NULL, solver->param()); sync.Run(gpus); } else { solver->Solve(); // 否则进行网络参数优化 } return 0; }
Test函数
int test() { vector<string> stages = get_stages_from_flags(); vector<int> gpus; get_gpus(&gpus); if (gpus.size() != 0) { Caffe::SetDevice(gpus[0]); // 使用GPU gpus[0] Caffe::set_mode(Caffe::GPU); } else { Caffe::set_mode(Caffe::CPU); // 使用CPU } Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages); // 创建一个测试网络 caffe_net.CopyTrainedLayersFrom(FLAGS_weights); // 把训练好的参数拷贝到网络里 vector<int> test_score_output_id; vector<float> test_score; float loss = 0; for (int i = 0; i < FLAGS_iterations; ++i) { float iter_loss; const vector<Blob<float>*>& result = caffe_net.Forward(&iter_loss); // 进行前向传播 loss += iter_loss; // 累计Loss值以求平均值 int idx = 0; for (int j = 0; j < result.size(); ++j) { const float* result_vec = result[j]->cpu_data(); for (int k = 0; k < result[j]->count(); ++k, ++idx) { const float score = result_vec[k]; if (i == 0) { test_score.push_back(score); test_score_output_id.push_back(j); } else { test_score[idx] += score; // 累计网络输出的blob的值以求平均值 } } } } loss /= FLAGS_iterations; // 除以迭代的次数计算平均值 for (int i = 0; i < test_score.size(); ++i) { const float loss_weight = caffe_net.blob_loss_weights()[ caffe_net.output_blob_indices()[test_score_output_id[i]]]; const float mean_score = test_score[i] / FLAGS_iterations; // 除以迭代的次数求平均值 if (loss_weight) { // 对blob值进行loss_weight加权 loss_msg_stream << " (* " << loss_weight << " = " << loss_weight * mean_score << " loss)"; } } return 0; }
device_query函数
int device_query() { vector<int> gpus; // 通过cudaGetDeviceCount(&count)得到GPU个数,然后返回GPU id 0,1,...,count-1 get_gpus(&gpus); for (int i = 0; i < gpus.size(); ++i) { caffe::Caffe::SetDevice(gpus[i]); // 设置当前GPU为gpus[i] caffe::Caffe::DeviceQuery(); // 获得当前GPU信息 } return 0; } // 设置当前GPU为device_id void Caffe::SetDevice(const int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); // 得到当前线程使用的GPU id if (current_device == device_id) { // 如果已经是了,则退出 return; } CUDA_CHECK(cudaSetDevice(device_id)); // 设置这个线程使用GPU device_id // Get()返回一个Caffe类型的Thread local,如果cublas_handle_和curand_generator_不为null,需要释放它们 if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_ c171 )); if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); } // 重新建立一个新的cublas_handle_和curand_generator_并存在Thread local里 CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, cluster_seedgen())); } // 进行设备的查询 void Caffe::DeviceQuery() { cudaDeviceProp prop; CUDA_CHECK(cudaGetDeviceProperties(&prop, device)); // 获取设备属性 // 答应属性内容, LOG(INFO) << "Device id: " << device; LOG(INFO) << "Major revision number: " << prop.major; LOG(INFO) << "Minor revision number: " << prop.minor; LOG(INFO) << "Name: " << prop.name; LOG(INFO) << "Total global memory: " << prop.totalGlobalMem; LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock; LOG(INFO) << "Total registers per block: " << prop.regsPerBlock; LOG(INFO) << "Warp size: " << prop.warpSize; LOG(INFO) << "Maximum memory pitch: " << prop.memPitch; LOG(INFO) << "Maximum threads per block: " << prop.maxThreadsPerBlock; LOG(INFO) << "Maximum dimension of block: " << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", " << prop.maxThreadsDim[2]; LOG(INFO) << "Maximum dimension of grid: " << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", " << prop.maxGridSize[2]; LOG(INFO) << "Clock rate: " << prop.clockRate; LOG(INFO) << "Total constant memory: " << prop.totalConstMem; LOG(INFO) << "Texture alignment: " << prop.textureAlignment; LOG(INFO) << "Concurrent copy and execution: " << (prop.deviceOverlap ? "Yes" : "No"); LOG(INFO) << "Number of multiprocessors: " << prop.multiProcessorCount; LOG(INFO) << "Kernel execution timeout: " << (prop.kernelExecTimeoutEnabled ? "Yes" : "No"); return; }
time函数
int time() { caffe::Phase phase = get_phase_from_flags(caffe::TRAIN); vector<string> stages = get_stages_from_flags(); vector<int> gpus; get_gpus(&gpus); if (gpus.size() != 0) { // 使用GPU gpus[0] Caffe::SetDevice(gpus[0]); Caffe::set_mode(Caffe::GPU); } else { // 使用CPU Caffe::set_mode(Caffe::CPU); } Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages); float initial_loss; // 先做一次前向和反向传播来预先分配好内存,这样能使测试的结果更加稳定 caffe_net.Forward(&initial_loss); // 做一次前向传播。因为是速度测试,网络没有输入 caffe_net.Backward(); //再做一次反向传播 Timer total_timer, forward_timer, backward_timer, timer; total_timer.Start(); std::vector<double> forward_time_per_layer(layers.size(), 0.0); std::vector<double> backward_time_per_layer(layers.size(), 0.0); double forward_time = 0.0; double backward_time = 0.0; // 做FLAGS_iterations次迭代 for (int j = 0; j < FLAGS_iterations; ++j) { Timer iter_timer; iter_timer.Start(); forward_timer.Start(); // 对每一层分别做一次前向传播,并记录每层传播的时间 for (int i = 0; i < layers.size(); ++i) { timer.Start(); layers[i]->Forward(bottom_vecs[i], top_vecs[i]); forward_time_per_layer[i] += timer.MicroSeconds(); } forward_time += forward_timer.MicroSeconds(); backward_timer.Start(); // 对每层分别做反向传播 for (int i = layers.size() - 1; i >= 0; --i) { timer.Start(); layers[i]->Backward(top_vecs[i], bottom_need_backward[i], bottom_vecs[i]); backward_time_per_layer[i] += timer.MicroSeconds(); } backward_time += backward_timer.MicroSeconds(); } // 输出每层前向传播和反向传播需要的平均时间 LOG(INFO) << "Average time per layer: "; for (int i = 0; i < layers.size(); ++i) { const caffe::string& layername = layers[i]->layer_param().name(); LOG(INFO) << std::setfill(' ') << std::setw(10) << layername << "\tforward: " << forward_time_per_layer[i] / 1000 / FLAGS_iterations << " ms."; // 前向传播时间 LOG(INFO) << std::setfill(' ') << std::setw(10) << layername << "\tbackward: " << backward_time_per_layer[i] / 1000 / FLAGS_iterations << " ms."; // 反向传播时间 } total_timer.Stop(); LOG(INFO) << "Average Forward pass: " << forward_time / 1000 / FLAGS_iterations << " ms."; // 平均整个网络的前向传播时间 LOG(INFO) << "Average Backward pass: " << backward_time / 1000 / FLAGS_iterations << " ms."; // 整个网络的平均反向传播时间 LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() / FLAGS_iterations << " ms."; // 整个网络的前向加反向传播时间 LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms."; LOG(INFO) << "*** Benchmark ends ***"; return 0; }
相关文章推荐
- Caffe 源码阅读笔记 [基本模块] Layer和LayerFactory
- SDL源码阅读笔记(1) 基本模块
- SDL源码阅读笔记(1) 基本模块
- SDL源码阅读笔记(1) 基本模块
- SDL源码阅读笔记(1) 基本模块
- caffe 源码阅读笔记(0):基本概述
- caffe源码阅读9-loss_layer.hpp+各cpp
- Caffe源码阅读笔记(1):Blob
- 非典型2D游戏引擎 Orx 源码阅读笔记(4) 用C实现的基本容器(List,HashTable,Tree)
- 学习笔记: 源码 caffe.cpp 初探
- Caffe 源码阅读笔记 [DB] 存储Caffe数据的LevelDB类
- SDL源码阅读笔记(3)渲染模块
- caffe源码阅读3-blob.cpp
- 非典型2D游戏引擎 Orx 源码阅读笔记(2) 基础模块与模块管理模块
- caffe源码阅读7-neuron_layers.hpp+各cpp
- Nginx 源码阅读笔记8 epoll 模块
- CI框架源码阅读笔记1 - 环境准备、基本术语和框架流程
- Yii源码阅读笔记 - 请求处理基本流程
- caffe源码阅读-插曲-math_function.cpp
- 音视频解码模块阅读笔记(三)—视频编解码的基本概念--转载