您的位置:首页 > 其它

faster rcnn在ubuntu环境下使用GPU模式并用cuDNN v5加速

2017-03-14 17:18 495 查看
本人第一次写博客,水平低,请大家见谅!

博主这断时间跳进了faster rcnn这个坑里面,差点没爬起来。这篇文章主要讲在faster rcnn配置成中让cuDNN v5跑起来。主要讲gpu模式中,使用cudnn v5加速,关于其他的配置问题,建议大家仔细阅读官网资料,https://github.com/rbgirshick/py-faster-rcnn。毕竟每个人的电脑配置都不同。

本人使用的是CUDA8.0,cuDNN v5.1.首先要做到:

1.确认自己编译没有问题(就是不使用cuDNN的时候能编译通过)

2.CUDA8.0,cuDNN v5安装好了,并且没有问题。

原文地址:

https://github.com/ShuangLI59/person_search/pull/8/commits/622ff2b2f4a20bbc91f4828f3e85ffd6f22d1c75#diff-a3c8b6ea171736e9ddcf84d93b7d6364R75

废话不多说,如果要使用CUDNN,需要在makefile.config中使下面选项打开:

USE_CUDNN := 1

然后就需要修改下列文件:

1: caffe-fast-rcnn/include/caffe/layers/cudnn_relu_layer.hpp

在第40行+(表示添加)

cudnnHandle_t             handle_;
cudnnTensorDescriptor_t bottom_desc_;
cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
};


2: caffe-fast-rcnn/include/caffe/layers/cudnn_sigmoid_layer.hpp

在第40行+

cudnnHandle_t             handle_;
cudnnTensorDescriptor_t bottom_desc_;
cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
};


3: caffe-fast-rcnn/include/caffe/layers/cudnn_tanh_layer.hpp

在第40行+

cudnnHandle_t             handle_;
cudnnTensorDescriptor_t bottom_desc_;
cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
};


4: caffe-fast-rcnn/include/caffe/util/cudnn.hpp

在第94行+,96-(’-‘表示删除)新生成的96行及

后面+

CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
+#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
-      n, c, h, w));
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#else
+  CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#endif
}


删除131,132行,并重新添加内容

CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
-  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,
-        pad_h, pad_w, stride_h, stride_w));
+#if CUDNN_VERSION_MIN(5, 0, 0)
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
+#else
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h
4000
, stride_w));
+#endif
+}
+
+template <typename Dtype>
+inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc,
+    cudnnActivationMode_t mode) {
+  CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc));
+  CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode,
+                                           CUDNN_PROPAGATE_NAN, Dtype(0)));
}


5: caffe-fast-rcnn/src/caffe/layers/cudnn_conv_layer.cu

删除33行,38-44行

if (this->bias_term_) {
const Dtype* bias_data = this->blobs_[1]->gpu_data();
-#if CUDNN_VERSION_MIN(4, 0, 0)
CUDNN_CHECK(cudnnAddTensor(handle_[g],
cudnn::dataType<Dtype>::one,
bias_desc_, bias_data + bias_offset_ * g,
cudnn::dataType<Dtype>::one,
top_descs_[i], top_data + top_offset_ * g));
-#else
-        CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C,
-              cudnn::dataType<Dtype>::one,
-              bias_desc_, bias_data + bias_offset_ * g,
-              cudnn::dataType<Dtype>::one,
-              top_descs_[i], top_data + top_offset_ * g));
-#endif
}
}


在第77行删除并添加:

if (this->param_propagate_down_[0]) {
const Dtype* bottom_data = bottom[i]->gpu_data();
-        CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardFilter(
handle_[1*this->group_ + g],
cudnn::dataType<Dtype>::one,
bottom_descs_[i], bottom_data + bottom_offset_ * g,


在第95行删除并添加:

Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
-        CUDNN_CHECK(cudnnConvolutionBackwardData_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardData(
handle_[2*this->group_ + g],
cudnn::dataType<Dtype>::one,


6: caffe-fast-rcnn/src/caffe/layers/cudnn_relu_layer.cpp

在16行添加

cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_RELU);
handles_setup_ = true;
}


7: caffe-fast-rcnn/src/caffe/layers/cudnn_relu_layer.cu

在第18行,20行,25行删除添加:

const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
cudnn::dataType<Dtype>::one,
this->bottom_desc_, bottom_data,
cudnn::dataType<Dtype>::zero,
this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
}

template <typename Dtype>


在52,54,60行删除添加:

const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
cudnn::dataType<Dtype>::one,
this->top_desc_, top_data, this->top_desc_, top_diff,
this->bottom_desc_, bottom_data,
cudnn::dataType<Dtype>::zero,
this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
}


8: caffe-fast-rcnn/src/caffe/layers/cudnn_sigmoid_layer.cpp

15,16行添加:

cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_,
+      CUDNN_ACTIVATION_SIGMOID);
handles_setup_ = true;


9: caffe-fast-rcnn/src/caffe/layers/cudnn_sigmoid_layer.cu

在13,15,20行添加删除:

const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
cudnn::dataType<Dtype>::one,
this->bottom_desc_, bottom_data,
cudnn::dataType<Dtype>::zero,
this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
}


在42,44,50添加删除:

Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
cudnn::dataType<Dtype>::one,
this->top_desc_, top_data, this->top_desc_, top_diff,
this->bottom_desc_, bottom_data,
cudnn::dataType<Dtype>::zero,
this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
}


10: caffe-fast-rcnn/src/caffe/layers/cudnn_tanh_layer.cpp

在16行添加:

cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_TANH);
handles_setup_ = true;


11: caffe-fast-rcnn/src/caffe/layers/cudnn_tanh_layer.cu

在13,15,20行添加删除:

const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
cudnn::dataType<Dtype>::one,
this->bottom_desc_, bottom_data,
cudnn::dataType<Dty
8ad4
pe>::zero,
this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
}


在43,45,51行添加删除:

const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();

+#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
cudnn::dataType<Dtype>::one,
this->top_desc_, top_data, this->top_desc_, top_diff,
this->bottom_desc_, bottom_data,
cudnn::dataType<Dtype>::zero,
this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
}


然后重新编译makefile.config文件(记住先make clean后make)

后面我碰到的一个问题是

‘numpy.float64’ object cannot be interpreted as an index

主要原因是版本不支持问题,解决方法:sudo pip install -U numpy==1.11.0

希望大家能把GPU模式跑起来,毕竟cpu和gpu不是一个数量级的.cudnn还能够减少消耗的显存。

有什么安装,编译问题大家可以一起探讨!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: