您的位置:首页 > Web前端

caffe源码(8)-video_data_layer.cpp

2018-01-30 15:18 417 查看
video_data_layer层是在caffe里面新添加的一个DataLayer,该层允许caffe的输入data层数据为视频。由于视频是由一帧一帧的图像组成,因此该层的定义主要参考image_data_layer层。

同样遵循:VideoData层参数定义->VideoData数据层声明->VideoData数据层实现的顺序,介绍如下:

一.caffe.proto层参数定义

层参数定义文件位于:src/caffe/proto/caffe.proto

message VideoDataParameter {
//VideoData层参数设定
optional string source = 1;//训练和测试所用的txt文档的位置
optional uint32 batch_size = 4 [default = 1];
optional uint32 rand_skip = 7 [default = 0];
optional bool shuffle = 8 [default = false];
optional uint32 new_height = 9 [default = 0];
optional uint32 new_width = 10 [default = 0];
optional uint32 new_length = 13;//new_length:一个视频最小片段包含几张图片文件,帧数
optional bool is_color = 11 [default = true];
optional float scale = 2 [default = 1];
optional string mean_file = 3;
optional uint32 crop_size = 5 [default = 0];
optional bool mirror = 6 [default = false];
optional string root_folder = 12 [default = ""];
optional bool show_data = 14 [default = false];
optional bool use_image = 15 [default = false];
optional bool use_temporal_jitter = 16 [default = false];
optional uint32 sampling_rate = 17 [default = 1];
optional uint32 max_sampling_rate = 18 [default = 1];
optional bool use_sampling_rate_jitter = 19 [default = false];
//use_multiple_label和num_of_labels两个参数需要同时设定
optional bool use_multiple_label = 20 [default = false];
optional uint32 num_of_labels = 21 [default = 1];
}


二.VideoData层声明

层声明文件位于:include/caffe/layers/video_data_layer.hpp

#ifndef CAFFE_VIDEO_DATA_LAYER_HPP_
#define CAFFE_VIDEO_DATA_LAYER_HPP_
#include <string>
#include <utility>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {
//类声明
//VideoDataLayer层寄存于BasePrefetchingDataLayer
template <typename Dtype>
class VideoDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
//显式构造函数
explicit VideoDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~VideoDataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "VideoData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 3; }

protected:
//随机数生成器,声明在include/caffe/caffe.hpp中
//caffe里的随机数是一个很重要的概念,最主要用于初始化权重,shuffle(洗牌)的时候也要用到
shared_ptr<Caffe::RNG> prefetch_rng_;
virtual void Shuff
4000
leClips();
virtual void load_batch(Batch<Dtype>* batch);
 //从param_获取VideoData层参数
vector<string> file_list_;
vector<int> start_frm_list_;
vector<int> label_list_;
vector<int> individual_sampling_rate_list_;
vector<vector<int> > multiple_label_list_;
vector<float> overlap_list_;
vector<int> shuffle_index_;
int lines_id_;

};
}  // namespace caffe
#endif  // CAFFE_VIDEO_DATA_LAYER_HPP_


三.VideoData层实现

层实现文件位于:src/caffe/layers/video_data_layer.cpp

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <fstream>  // NOLINT(readability/streams)
#include <iostream>  // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/video_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/image_io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

namespace caffe {

template <typename Dtype>
VideoDataLayer<Dtype>::~VideoDataLayer<Dtype>() {
this->StopInternalThread();
}

template <typename Dtype>
void VideoDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
//从param_获取层参数
const int new_length = this->layer_param_.video_data_param().new_length();
const int new_height = this->layer_param_.video_data_param().new_height();
const int new_width  = this->layer_param_.video_data_param().new_width();
string root_folder = this->layer_param_.video_data_param().root_folder();
 //caffe中只能处理height和width大小一致的图片
CHECK((new_height == 0 && new_width == 0) ||
(new_height > 0 && new_width > 0)) << "Current implementation requires "
"new_height and new_width to be set at the same time.";

// Read the list file
//从param_中读取视频数据文件及参数
const string& source = this->layer_param_.video_data_param().source();
const bool use_temporal_jitter = this->layer_param_.video_data_param().use_temporal_jitter();
const bool use_image = this->layer_param_.video_data_param().use_image();
const int sampling_rate = this->layer_param_.video_data_param().sampling_rate();
const bool use_multiple_label = this->layer_param_.video_data_param().use_multiple_label();
if (use_multiple_label) {//若指定使用多标签,必须设置num_of_labels参数
CHECK(this->layer_param_.video_data_param().has_num_of_labels()) <<
"number of labels must be set together with use multiple labels";

}
const int num_of_labels = this->layer_param_.video_data_param().num_of_labels();

LOG(INFO) << "Opening file " << source;
//打开视频数据文件流
std::ifstream infile(source.c_str());
int count = 0;
string filename, labels;
int start_frm, label, individual_sampling_rate; // 030317
float overlap;  // 030317

if (!use_multiple_label) {
if ((!use_image) && use_temporal_jitter){
while (infile >> filename >> label) {
file_list_.push_back(filename);
label_list_.push_back(label);
shuffle_index_.push_back(count);
count++;
}
} else {    // only modify the case of 1. image seq & 2. single label 030317
//文件内容格式:文件位置->文件名称->开始帧->标签->帧率->重叠度
while (infile >> filename >> start_frm >> label >> individual_sampling_rate >> overlap) {
 //获取文件内容
file_list_.push_back(filename);
start_frm_list_.push_back(start_frm);
label_list_.push_back(label);
overlap_list_.push_back(overlap);
individual_sampling_rate_list_.push_back(individual_sampling_rate);
shuffle_index_.push_back(count);
count++;
}
}
} else {
if ((!use_image) && use_temporal_jitter){
while (infile >> filename >> labels) {
file_list_.push_back(filename);
shuffle_index_.push_back(count);
vector<int> label_set;
int tmp_int;
stringstream sstream(labels);
while (sstream >> tmp_int) {
label_set.push_back(tmp_int);
if (sstream.peek() == ',')
sstream.ignore();
}
multiple_label_list_.push_back(label_set);
label_list_.push_back(label_set[0]);
count++;
}
} else {
 //文件内容格式:文件位置->文件名称->开始帧->标签
while (infile >> filename >> start_frm >> labels) {
file_list_.push_back(filename);
start_frm_list_.push_back(start_frm);
shuffle_index_.push_back(count);
vector<int> label_set;
int tmp_int;
stringstream sstream(labels);
while (sstream >> tmp_int) {
label_set.push_back(tmp_int);
if (sstream.peek() == ',')
sstream.ignore();
}
multiple_label_list_.push_back(label_set);
label_list_.push_back(label_set[0]);
count++;
}
}
}
infile.close();

if (this->layer_param_.video_data_param().shuffle()) {//若指定了shuffle参数
// randomly shuffle data
//产生随机数,对data进行随机shuffle
LOG(INFO) << "Shuffling data";
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
ShuffleClips();
}

if (count==0){
LOG(INFO) << "Failed to read the clip list" << std::endl;
}
lines_id_ = 0;
LOG(INFO) << "A total of " << shuffle_index_.size() << " video chunks.";

// Check if we would need to randomly skip a few data points
//是否需要执行随机跳过skip操作
if (this->layer_param_.video_data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.video_data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
CHECK_GT(shuffle_index_.size(), skip) << "Not enough points to skip";
lines_id_ = skip;
}

// Read a data point, and use it to initialize the top blob.
VolumeDatum datum;
int id = shuffle_index_[lines_id_];
if (!use_image){//没有使用图像,读取video数据到datum
if (use_temporal_jitter){
CHECK(ReadVideoToVolumeDatum((root_folder + file_list_[0]).c_str(), 0, label_list_[0],
new_length, new_height, new_width, sampling_rate, &datum));
}
else
CHECK(ReadVideoToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id],
new_length, new_height, new_width, sampling_rate, &datum));
}
else{//使用图像,读取image数据到datum

CHECK(ReadImageSequenceToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id],
new_length, new_height, new_width, sampling_rate, &datum, overlap_list_[id]));   // 030317 no change -> overlap added
}

// Use data_transformer to infer the expected blob shape from a cv_image.
//用data_transform推断datum的形状
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);

// Reshape prefetch_data and top[0] according to the batch_size.
//根据batch_size对prefetch_data和top[0]进行Reshape
const int batch_size = this->layer_param_.video_data_param().batch_size();
CHECK_GT(batch_size, 0) << "Positive batch size required";//batch_size必须大于0
top_shape[0] = batch_size;
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->data_.Reshape(top_shape);
}
top[0]->Reshape(top_shape);

LOG(INFO) << "output data size: " << top[0]->shape(0) << ","
<< top[0]->shape(1) << "," << top[0]->shape(2) << ","
<< top[0]->shape(3) << "," << top[0]->shape(4);

// label
vector<int> label_shape;
label
e304
_shape.push_back(batch_size);
if (use_multiple_label)
label_shape.push_back(num_of_labels);
top[1]->Reshape(label_shape);

for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->label_.Reshape(label_shape);
}
// overlap 030317
vector<int> overlap_shape;
overlap_shape.push_back(batch_size);
top[2]->Reshape(overlap_shape);

for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->overlap_.Reshape(overlap_shape);
}
}

template <typename Dtype>
void VideoDataLayer<Dtype>::ShuffleClips() {
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
shuffle(shuffle_index_.begin(), shuffle_index_.end(), prefetch_rng);
}

// This function is called on prefetch thread
template <typename Dtype>
void VideoDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
//从param_获取层参数定义
VideoDataParameter video_data_param = this->layer_param_.video_data_param();
const int batch_size = video_data_param.batch_size();
const int new_length = video_data_param.new_length();
const int new_height = video_data_param.new_height();
const int new_width = video_data_param.new_width();
string root_folder = video_data_param.root_folder();
const bool use_image = video_data_param.use_image();
const bool use_temporal_jitter = video_data_param.use_temporal_jitter();
int sampling_rate = video_data_param.sampling_rate();
const int max_sampling_rate = video_data_param.max_sampling_rate();
const bool use_sampling_rate_jitter = video_data_param.use_sampling_rate_jitter();
const bool show_data = video_data_param.show_data();
const bool use_multiple_label = this->layer_param_.video_data_param().use_multiple_label();
if (use_multiple_label) {
CHECK(this->layer_param_.video_data_param().has_num_of_labels()) <<
"number of labels must be set together with use multiple labels";
}
const int num_of_labels = this->layer_param_.video_data_param().num_of_labels();

// Reshape according to the first image of each batch
// on single input batches allows for inputs of varying dimension.
// Read a data point, and use it to initialize the top blob.
VolumeDatum datum;
int id = shuffle_index_[lines_id_];
if (!use_image){
if (use_temporal_jitter){
ReadVideoToVolumeDatum((root_folder + file_list_[0]).c_str(), 0, label_list_[0],
new_length, new_height, new_width, sampling_rate, &datum);
} else {
ReadVideoToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id],
new_length, new_height, new_width, sampling_rate, &datum);
}
} else {
// LOG(INFO) << "read video from " << file_list_[id].c_str();
CHECK(ReadImageSequenceToVolumeDatum((root_folder + file_list_[id]).c_str(), start_frm_list_[id], label_list_[id],
new_length, new_height, new_width, sampling_rate, &datum, overlap_list_[id]));   // 030317 no change -> overlap added
}

// Use data_transformer to infer the expected blob shape from a cv_img.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);

// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);

Dtype* prefetch_data = batch->data_.mutable_cpu_data();
Dtype* prefetch_label = batch->label_.mutable_cpu_data();
Dtype* prefetch_overlap = batch->overlap_.mutable_cpu_data(); // 030317

// datum scales
const int dataset_size = shuffle_index_.size();
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
if (use_sampling_rate_jitter) {
sampling_rate = caffe::caffe_rng_rand() % (max_sampling_rate) + 1;
}
timer.Start();
CHECK_GT(dataset_size, lines_id_);
bool read_status;
int id = this->shuffle_index_[this->lines_id_];
if (!use_image){
if (!use_temporal_jitter){
read_status = ReadVideoToVolumeDatum((root_folder + this->file_list_[id]).c_str(), this->start_frm_list_[id],
this->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum);
}else{
read_status = ReadVideoToVolumeDatum((root_folder + this->file_list_[id]).c_str(), -1,
this->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum);
}
} else {
if (!use_temporal_jitter) {
// LOG(INFO) << "read video from " << this->file_list_[id].c_str();
read_status = ReadImageSequenceToVolumeDatum((root_folder + this->file_list_[id]).c_str(), this->start_frm_list_[id],
this->label_list_[id], new_length, new_height, new_width, sampling_rate*this->individual_sampling_rate_list_[id], &datum, overlap_list_[id]); // 030317 -> overlap added
} else {
int num_of_frames = this->start_frm_list_[id];
int use_start_frame;
if (num_of_frames < new_length * sampling_rate){
LOG(INFO) << "not enough frames; having " << num_of_frames;
read_status = false;
} else {
if (this->phase_ == TRAIN)
use_start_frame = caffe_rng_rand()%(num_of_frames-new_length*sampling_rate+1) + 1;
else
use_start_frame = 0;
read_status = ReadImageSequenceToVolumeDatum((root_folder + this->file_list_[id]).c_str(), use_start_frame,
this->label_list_[id], new_length, new_height, new_width, sampling_rate*this->individual_sampling_rate_list_[id], &datum, overlap_list_[id]); // 030317 -> overlap added
}
}
}

if (this->phase_ == TEST){
CHECK(read_status) << "Testing must not miss any example";
}

if (!read_status) {
this->lines_id_++;
if (this->lines_id_ >= dataset_size) {
// We have reached the end. Restart from the first.
LOG(INFO) << "Restarting data prefetching from start.";
this->lines_id_ = 0;
if (this->layer_param_.video_data_param().shuffle()){
ShuffleClips();
}
}
item_id--;
continue;
}
read_time += timer.MicroSeconds();
timer.Start();

// Apply transformations (mirror, crop...) to the video
vector<int> shape_vec(5, 0);
shape_vec[0] = item_id;
int offset = batch->data_.offset(shape_vec);
this->transformed_data_.set_cpu_data(prefetch_data + offset);
this->data_transformer_->VideoTransform(datum, &(this->transformed_data_));
trans_time += timer.MicroSeconds();

if (!use_multiple_label) {
prefetch_label[item_id] = datum.label();
prefetch_overlap[item_id] = datum.overlap();  // 030317
//LOG(INFO) << "datum.overlap(): " << datum.overlap(); // 030317
} else {
caffe_set<Dtype>(num_of_labels, Dtype(0), prefetch_label + item_id * num_of_labels);
for (int index= 0; index < this->multiple_label_list_[id].size(); index++) {
prefetch_label[item_id * num_of_labels +
this->multiple_label_list_[id][index]] = Dtype(1);
}
}

// Show visualization
if (show_data){
const Dtype* data_buffer = (Dtype*)(prefetch_data + offset);
int image_size, channel_size;
image_size = top_shape[3] * top_shape[4];
channel_size = top_shape[2] * image_size;
for (int l = 0; l < top_shape[2]; ++l) {
for (int c = 0; c < top_shape[1]; ++c) {
cv::Mat img;
char ch_name[64];
BufferToGrayImage(data_buffer + c * channel_size + l * image_size, top_shape[3], top_shape[4], &img);
sprintf(ch_name, "Channel %d", c);
cv::namedWindow(ch_name, CV_WINDOW_AUTOSIZE);
cv::imshow(ch_name, img);
}
cv::waitKey(100);
}
}

// go to the next iter
this->lines_id_++;
if (lines_id_ >= dataset_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
if (this->layer_param_.video_data_param().shuffle()) {
ShuffleClips();
}
}
}

batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";

// std::ofstream out("profile_inference.log", std::ofstream::out | std::ofstream::app);
// out << "data CPU " << batch_timer.MilliSeconds() << "\n";
// out.close();
}

INSTANTIATE_CLASS(VideoDataLayer);
REGISTER_LAYER_CLASS(VideoData);

}  // namespace caffe
#endif  // USE_OPENCV
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  源码 视频