caffe源码(8)-video_data_layer.cpp
2018-01-30 15:18
417 查看
video_data_layer层是在caffe里面新添加的一个DataLayer,该层允许caffe的输入data层数据为视频。由于视频是由一帧一帧的图像组成,因此该层的定义主要参考image_data_layer层。
同样遵循:VideoData层参数定义->VideoData数据层声明->VideoData数据层实现的顺序,介绍如下:
同样遵循:VideoData层参数定义->VideoData数据层声明->VideoData数据层实现的顺序,介绍如下:
一.caffe.proto层参数定义
层参数定义文件位于:src/caffe/proto/caffe.protomessage VideoDataParameter { //VideoData层参数设定 optional string source = 1;//训练和测试所用的txt文档的位置 optional uint32 batch_size = 4 [default = 1]; optional uint32 rand_skip = 7 [default = 0]; optional bool shuffle = 8 [default = false]; optional uint32 new_height = 9 [default = 0]; optional uint32 new_width = 10 [default = 0]; optional uint32 new_length = 13;//new_length:一个视频最小片段包含几张图片文件,帧数 optional bool is_color = 11 [default = true]; optional float scale = 2 [default = 1]; optional string mean_file = 3; optional uint32 crop_size = 5 [default = 0]; optional bool mirror = 6 [default = false]; optional string root_folder = 12 [default = ""]; optional bool show_data = 14 [default = false]; optional bool use_image = 15 [default = false]; optional bool use_temporal_jitter = 16 [default = false]; optional uint32 sampling_rate = 17 [default = 1]; optional uint32 max_sampling_rate = 18 [default = 1]; optional bool use_sampling_rate_jitter = 19 [default = false]; //use_multiple_label和num_of_labels两个参数需要同时设定 optional bool use_multiple_label = 20 [default = false]; optional uint32 num_of_labels = 21 [default = 1]; }
二.VideoData层声明
层声明文件位于:include/caffe/layers/video_data_layer.hpp#ifndef CAFFE_VIDEO_DATA_LAYER_HPP_ #define CAFFE_VIDEO_DATA_LAYER_HPP_ #include <string> #include <utility> #include <vector> #include "caffe/blob.hpp" #include "caffe/data_transformer.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" #include "caffe/layers/base_data_layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { //类声明 //VideoDataLayer层寄存于BasePrefetchingDataLayer template <typename Dtype> class VideoDataLayer : public BasePrefetchingDataLayer<Dtype> { public: //显式构造函数 explicit VideoDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param) {} virtual ~VideoDataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "VideoData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 3; } protected: //随机数生成器,声明在include/caffe/caffe.hpp中 //caffe里的随机数是一个很重要的概念,最主要用于初始化权重,shuffle(洗牌)的时候也要用到 shared_ptr<Caffe::RNG> prefetch_rng_; virtual void Shuff 4000 leClips(); virtual void load_batch(Batch<Dtype>* batch); //从param_获取VideoData层参数 vector<string> file_list_; vector<int> start_frm_list_; vector<int> label_list_; vector<int> individual_sampling_rate_list_; vector<vector<int> > multiple_label_list_; vector<float> overlap_list_; vector<int> shuffle_index_; int lines_id_; }; } // namespace caffe #endif // CAFFE_VIDEO_DATA_LAYER_HPP_
三.VideoData层实现
层实现文件位于:src/caffe/layers/video_data_layer.cpp#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #include <fstream> // NOLINT(readability/streams) #include <iostream> // NOLINT(readability/streams) #include <string> #include <utility> #include <vector> #include "caffe/data_transformer.hpp" #include "caffe/layers/base_data_layer.hpp" #include "caffe/layers/video_data_layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/image_io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/rng.hpp" namespace caffe { template <typename Dtype> VideoDataLayer<Dtype>::~VideoDataLayer<Dtype>() { this->StopInternalThread(); } template <typename Dtype> void VideoDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //从param_获取层参数 const int new_length = this->layer_param_.video_data_param().new_length(); const int new_height = this->layer_param_.video_data_param().new_height(); const int new_width = this->layer_param_.video_data_param().new_width(); string root_folder = this->layer_param_.video_data_param().root_folder(); //caffe中只能处理height和width大小一致的图片 CHECK((new_height == 0 && new_width == 0) || (new_height > 0 && new_width > 0)) << "Current implementation requires " "new_height and new_width to be set at the same time."; // Read the list file //从param_中读取视频数据文件及参数 const string& source = this->layer_param_.video_data_param().source(); const bool use_temporal_jitter = this->layer_param_.video_data_param().use_temporal_jitter(); const bool use_image = this->layer_param_.video_data_param().use_image(); const int sampling_rate = this->layer_param_.video_data_param().sampling_rate(); const bool use_multiple_label = this->layer_param_.video_data_param().use_multiple_label(); if (use_multiple_label) {//若指定使用多标签,必须设置num_of_labels参数 CHECK(this->layer_param_.video_data_param().has_num_of_labels()) << "number of labels must be set together with use multiple labels"; } const int num_of_labels = this->layer_param_.video_data_param().num_of_labels(); LOG(INFO) << "Opening file " << source; //打开视频数据文件流 std::ifstream infile(source.c_str()); int count = 0; string filename, labels; int start_frm, label, individual_sampling_rate; // 030317 float overlap; // 030317 if (!use_multiple_label) { if ((!use_image) && use_temporal_jitter){ while (infile >> filename >> label) { file_list_.push_back(filename); label_list_.push_back(label); shuffle_index_.push_back(count); count++; } } else { // only modify the case of 1. image seq & 2. single label 030317 //文件内容格式:文件位置->文件名称->开始帧->标签->帧率->重叠度 while (infile >> filename >> start_frm >> label >> individual_sampling_rate >> overlap) { //获取文件内容 file_list_.push_back(filename); start_frm_list_.push_back(start_frm); label_list_.push_back(label); overlap_list_.push_back(overlap); individual_sampling_rate_list_.push_back(individual_sampling_rate); shuffle_index_.push_back(count); count++; } } } else { if ((!use_image) && use_temporal_jitter){ while (infile >> filename >> labels) { file_list_.push_back(filename); shuffle_index_.push_back(count); vector<int> label_set; int tmp_int; stringstream sstream(labels); while (sstream >> tmp_int) { label_set.push_back(tmp_int); if (sstream.peek() == ',') sstream.ignore(); } multiple_label_list_.push_back(label_set); label_list_.push_back(label_set[0]); count++; } } else { //文件内容格式:文件位置->文件名称->开始帧->标签 while (infile >> filename >> start_frm >> labels) { file_list_.push_back(filename); start_frm_list_.push_back(start_frm); shuffle_index_.push_back(count); vector<int> label_set; int tmp_int; stringstream sstream(labels); while (sstream >> tmp_int) { label_set.push_back(tmp_int); if (sstream.peek() == ',') sstream.ignore(); } multiple_label_list_.push_back(label_set); label_list_.push_back(label_set[0]); count++; } } } infile.close(); if (this->layer_param_.video_data_param().shuffle()) {//若指定了shuffle参数 // randomly shuffle data //产生随机数,对data进行随机shuffle LOG(INFO) << "Shuffling data"; const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); ShuffleClips(); } if (count==0){ LOG(INFO) << "Failed to read the clip list" << std::endl; } lines_id_ = 0; LOG(INFO) << "A total of " << shuffle_index_.size() << " video chunks."; // Check if we would need to randomly skip a few data points //是否需要执行随机跳过skip操作 if (this->layer_param_.video_data_param().rand_skip()) { unsigned int skip = caffe_rng_rand() % this->layer_param_.video_data_param().rand_skip(); LOG(INFO) << "Skipping first " << skip << " data points."; CHECK_GT(shuffle_index_.size(), skip) << "Not enough points to skip"; lines_id_ = skip; } // Read a data point, and use it to initialize the top blob. VolumeDatum datum; int id = shuffle_index_[lines_id_]; if (!use_image){//没有使用图像,读取video数据到datum if (use_temporal_jitter){ CHECK(ReadVideoToVolumeDatum((root_folder + file_list_[0]).c_str(), 0, label_list_[0], new_length, new_height, new_width, sampling_rate, &datum)); } else CHECK(ReadVideoToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id], new_length, new_height, new_width, sampling_rate, &datum)); } else{//使用图像,读取image数据到datum CHECK(ReadImageSequenceToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id], new_length, new_height, new_width, sampling_rate, &datum, overlap_list_[id])); // 030317 no change -> overlap added } // Use data_transformer to infer the expected blob shape from a cv_image. //用data_transform推断datum的形状 vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape prefetch_data and top[0] according to the batch_size. //根据batch_size对prefetch_data和top[0]进行Reshape const int batch_size = this->layer_param_.video_data_param().batch_size(); CHECK_GT(batch_size, 0) << "Positive batch size required";//batch_size必须大于0 top_shape[0] = batch_size; for (int i = 0; i < this->prefetch_.size(); ++i) { this->prefetch_[i]->data_.Reshape(top_shape); } top[0]->Reshape(top_shape); LOG(INFO) << "output data size: " << top[0]->shape(0) << "," << top[0]->shape(1) << "," << top[0]->shape(2) << "," << top[0]->shape(3) << "," << top[0]->shape(4); // label vector<int> label_shape; label e304 _shape.push_back(batch_size); if (use_multiple_label) label_shape.push_back(num_of_labels); top[1]->Reshape(label_shape); for (int i = 0; i < this->prefetch_.size(); ++i) { this->prefetch_[i]->label_.Reshape(label_shape); } // overlap 030317 vector<int> overlap_shape; overlap_shape.push_back(batch_size); top[2]->Reshape(overlap_shape); for (int i = 0; i < this->prefetch_.size(); ++i) { this->prefetch_[i]->overlap_.Reshape(overlap_shape); } } template <typename Dtype> void VideoDataLayer<Dtype>::ShuffleClips() { caffe::rng_t* prefetch_rng = static_cast<caffe::rng_t*>(prefetch_rng_->generator()); shuffle(shuffle_index_.begin(), shuffle_index_.end(), prefetch_rng); } // This function is called on prefetch thread template <typename Dtype> void VideoDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); //从param_获取层参数定义 VideoDataParameter video_data_param = this->layer_param_.video_data_param(); const int batch_size = video_data_param.batch_size(); const int new_length = video_data_param.new_length(); const int new_height = video_data_param.new_height(); const int new_width = video_data_param.new_width(); string root_folder = video_data_param.root_folder(); const bool use_image = video_data_param.use_image(); const bool use_temporal_jitter = video_data_param.use_temporal_jitter(); int sampling_rate = video_data_param.sampling_rate(); const int max_sampling_rate = video_data_param.max_sampling_rate(); const bool use_sampling_rate_jitter = video_data_param.use_sampling_rate_jitter(); const bool show_data = video_data_param.show_data(); const bool use_multiple_label = this->layer_param_.video_data_param().use_multiple_label(); if (use_multiple_label) { CHECK(this->layer_param_.video_data_param().has_num_of_labels()) << "number of labels must be set together with use multiple labels"; } const int num_of_labels = this->layer_param_.video_data_param().num_of_labels(); // Reshape according to the first image of each batch // on single input batches allows for inputs of varying dimension. // Read a data point, and use it to initialize the top blob. VolumeDatum datum; int id = shuffle_index_[lines_id_]; if (!use_image){ if (use_temporal_jitter){ ReadVideoToVolumeDatum((root_folder + file_list_[0]).c_str(), 0, label_list_[0], new_length, new_height, new_width, sampling_rate, &datum); } else { ReadVideoToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id], new_length, new_height, new_width, sampling_rate, &datum); } } else { // LOG(INFO) << "read video from " << file_list_[id].c_str(); CHECK(ReadImageSequenceToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id], new_length, new_height, new_width, sampling_rate, &datum, overlap_list_[id])); // 030317 no change -> overlap added } // Use data_transformer to infer the expected blob shape from a cv_img. vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); Dtype* prefetch_data = batch->data_.mutable_cpu_data(); Dtype* prefetch_label = batch->label_.mutable_cpu_data(); Dtype* prefetch_overlap = batch->overlap_.mutable_cpu_data(); // 030317 // datum scales const int dataset_size = shuffle_index_.size(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob if (use_sampling_rate_jitter) { sampling_rate = caffe::caffe_rng_rand() % (max_sampling_rate) + 1; } timer.Start(); CHECK_GT(dataset_size, lines_id_); bool read_status; int id = this->shuffle_index_[this->lines_id_]; if (!use_image){ if (!use_temporal_jitter){ read_status = ReadVideoToVolumeDatum((root_folder + this->file_list_[id]).c_str(), this->start_frm_list_[id], this->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum); }else{ read_status = ReadVideoToVolumeDatum((root_folder + this->file_list_[id]).c_str(), -1, this->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum); } } else { if (!use_temporal_jitter) { // LOG(INFO) << "read video from " << this->file_list_[id].c_str(); read_status = ReadImageSequenceToVolumeDatum((root_folder + this->file_list_[id]).c_str(), this->start_frm_list_[id], this->label_list_[id], new_length, new_height, new_width, sampling_rate*this->individual_sampling_rate_list_[id], &datum, overlap_list_[id]); // 030317 -> overlap added } else { int num_of_frames = this->start_frm_list_[id]; int use_start_frame; if (num_of_frames < new_length * sampling_rate){ LOG(INFO) << "not enough frames; having " << num_of_frames; read_status = false; } else { if (this->phase_ == TRAIN) use_start_frame = caffe_rng_rand()%(num_of_frames-new_length*sampling_rate+1) + 1; else use_start_frame = 0; read_status = ReadImageSequenceToVolumeDatum((root_folder + this->file_list_[id]).c_str(), use_start_frame, this->label_list_[id], new_length, new_height, new_width, sampling_rate*this->individual_sampling_rate_list_[id], &datum, overlap_list_[id]); // 030317 -> overlap added } } } if (this->phase_ == TEST){ CHECK(read_status) << "Testing must not miss any example"; } if (!read_status) { this->lines_id_++; if (this->lines_id_ >= dataset_size) { // We have reached the end. Restart from the first. LOG(INFO) << "Restarting data prefetching from start."; this->lines_id_ = 0; if (this->layer_param_.video_data_param().shuffle()){ ShuffleClips(); } } item_id--; continue; } read_time += timer.MicroSeconds(); timer.Start(); // Apply transformations (mirror, crop...) to the video vector<int> shape_vec(5, 0); shape_vec[0] = item_id; int offset = batch->data_.offset(shape_vec); this->transformed_data_.set_cpu_data(prefetch_data + offset); this->data_transformer_->VideoTransform(datum, &(this->transformed_data_)); trans_time += timer.MicroSeconds(); if (!use_multiple_label) { prefetch_label[item_id] = datum.label(); prefetch_overlap[item_id] = datum.overlap(); // 030317 //LOG(INFO) << "datum.overlap(): " << datum.overlap(); // 030317 } else { caffe_set<Dtype>(num_of_labels, Dtype(0), prefetch_label + item_id * num_of_labels); for (int index= 0; index < this->multiple_label_list_[id].size(); index++) { prefetch_label[item_id * num_of_labels + this->multiple_label_list_[id][index]] = Dtype(1); } } // Show visualization if (show_data){ const Dtype* data_buffer = (Dtype*)(prefetch_data + offset); int image_size, channel_size; image_size = top_shape[3] * top_shape[4]; channel_size = top_shape[2] * image_size; for (int l = 0; l < top_shape[2]; ++l) { for (int c = 0; c < top_shape[1]; ++c) { cv::Mat img; char ch_name[64]; BufferToGrayImage(data_buffer + c * channel_size + l * image_size, top_shape[3], top_shape[4], &img); sprintf(ch_name, "Channel %d", c); cv::namedWindow(ch_name, CV_WINDOW_AUTOSIZE); cv::imshow(ch_name, img); } cv::waitKey(100); } } // go to the next iter this->lines_id_++; if (lines_id_ >= dataset_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; lines_id_ = 0; if (this->layer_param_.video_data_param().shuffle()) { ShuffleClips(); } } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; // std::ofstream out("profile_inference.log", std::ofstream::out | std::ofstream::app); // out << "data CPU " << batch_timer.MilliSeconds() << "\n"; // out.close(); } INSTANTIATE_CLASS(VideoDataLayer); REGISTER_LAYER_CLASS(VideoData); } // namespace caffe #endif // USE_OPENCV
相关文章推荐
- caffe源码解读(5)-image_data_layer.cpp
- caffe源码分析--data_layer.cpp
- caffe源码之VideoDataLayer
- caffe源码解读(11)-triplet_loss_layer.cpp
- 代码笔记:caffe-reid中reid_data_layer源码解析
- caffe源码分析:softmax_layer.cpp && softmax_loss_layer.cpp
- caffe源码分析--poolinger_layer.cpp
- caffe源码阅读9-loss_layer.hpp+各cpp
- Caffe源码:pooling_layer.cpp
- caffe源码深入学习4:支持魔改的layer:layer.hpp与layer.cpp
- Caffe源码解析4: Data_layer
- caffe源码解读(7)-data_transformer.cpp
- Caffe源码解析4: Data_layer
- Caffe源码解析4: Data_layer
- caffe源码分析--softmax_layer.cpp
- caffe源代码分析--data_layer.cpp
- Caffe源码:Softmax_loss_layer.cpp
- Caffe源码解析4: Data_layer
- Caffe源码解析4: Data_layer
- Caffe源码解析4: Data_layer