您的位置：首页 > 编程语言
r-cnn系列代码编译及解读(3)

2017-07-24 11:09 330 查看
本文基于 Joseph Redmon 提出的 YOLO2 算法，针对其算法代码实现做以下工作：

1）完成代码的安装与编译

2）使用自己的数据训练 darknet 模型

3）修改源代码（C代码），增加检测功能的python接口
Darknet安装

与之前的 fast-rcnn 及 faster-rcnn 不一样，官方 YOLO2 的实现（也包括 YOLO1）不再基于 caffe 框架，而是用 C 编写了一个新的深度学习框架 darknet

带来的好处是，其他依赖少，安装比较简单；但通用性不好，并且官方竟然没有给出 detect 的接口。

基于此，网上有很多的修改版本，参考这篇文章：

其中py-yolo2项目给 darknet 添加了 python 接口，但实测似乎没有效果；

YAD2K项目使用了keras与tensorflow-gpu，使用时需要对 darknet 的模型做转换，但模型训练接口没找到

最终还是决定使用 darknet 的实现，在源代码中添加 detect 函数供 python 调用。

安装 darknet 非常简单：

1）clone 工程
git clone https://github.com/pjreddie/darknet[/code] 
2）修改 makefile，添加 cuda 和 opencv 支持

GPU=1
CUDNN=1
OPENCV=1


3）make 编译，然后测试

#下载权值文件
wget http://pjreddie.com/media/files/yolo.weights #测试
./darknet detect cfg/yolo.cfg yolo.weights data/dog.jpg


使用自己的数据训练
参考这篇文章和这篇文章。

1）与 faster-rcnn 一样，使用 voc 格式的数据，按文中的“训练数据格式化”方法准备数据

2）修改 scripts/voc_label.py 的 classes 为自己数据的类名，并修改数据的路径

这个脚本会生成一个包含每张图片完整路径的文件 train.txt

并对每张图片生成一个 label/xxx.txt，记录其 box 和所属类：

1 0.727864583333 0.82861328125 0.246744791667 0.3251953125
2 0.34814453125 0.832763671875 0.3115234375 0.33349609375


3）修改配置文件

a.新建 data/my_voc.names ，按行写下数据的类别名

b.新建 cfg/my_voc.data，模仿 voc.data，修改 classes 数目，修改 train 和 names 的路径

c.修改网络参数

这里使用 cfg/tiny-yolo-voc.cfg

修改最后一层 convolutional 的 filters=（classes+coords+1）*5

和接下来的 region 的 classes=类别数

4）下载预训练模型文件

wget http://pjreddie.com/media/files/darknet.conv.weights[/code] 
5）训练

./darknet detector train ./cfg/my_voc.data ./cfg/my_tiny-yolo-voc.cfg darknet.conv.weights


这里有个蛮玄幻的东西：label 目录没有指定。 darknet 应该是在 train.txt 文件的目录下自动查找得到的

训练结果保存在 backup/ 下

同样用 darknet 做测试

./darknet detector test cfg/my_voc.data cfg/my_tiny-yolo-voc.cfg backup/my_tiny-yolo-voc.backup test/JPEGImages/00135.jpg


添加 detect 功能的 python 接口
源代码里 python/darknet.py 提供了 classify 和 detect 的函数接口，但实际上作者仅实现了 classify， detect 的代码内容与其一模一样（也不知道作者怎么想的，作为一个主打检测的算法。。。）

那么基本思路就有了：在 c 文件中添加 detect 函数，修改 makefile 将该函数编译进库文件 libdarknet.so

添加detect函数接口
分析源代码，发现作者将所有函数接口都放在 include/darknet.h 里。因此也在该头文件声明：

//////////////////////////////////////////////////////////////////////////////////
typedef struct{
int validate_number;
float prob;
box_label box;
} boxs_label;
int get_last_layer_size(network *net);
boxs_label* detect_img(network *net, image im, float thresh, float hier_thresh);
image np_to_image(unsigned char* data, int w, int h, int c);
//////////////////////////////////////////////////////////////////////////////////


1）其中结构体 boxs_label 是 detect 的返回值，记录此次检测的目标个数，相应目标的置信度和位置信息

为了避免内存泄露，将该返回值作为全局变量，首先在读取网络模型的代码中申请空间，每次检测仅修改其内容：

// 修改src/network.c
extern boxs_label* detect_result;
network load_network(char *cfg, char *weights, int clear)
{
network net = parse_network_cfg(cfg);
if(weights && weights[0] != 0){
load_weights(&net, weights);
}
if(clear) *net.seen = 0;

layer l = net.layers[net.n-1];
detect_result = calloc(l.w*l.h*l.n, sizeof(boxs_label));

return net;
}


同时在 src/ 下新建文件 extern_var.h 和 extern_var.c

// extern_var.c
#include "darknet.h"
#include "extern_var.h"

boxs_label* detect_result;

// extern_var.h
// 空


2）get_last_layer_size 是辅助函数，python 里需要借助它得到网络输出的大小；detect_img 是检测主函数。

在 examples/detector.c 里添加其实现：

extern boxs_label* detect_result;

int get_last_layer_size(network *net)
{
layer l = (*net).layers[(*net).n-1];
return l.w*l.h*l.n;
}

boxs_label* detect_img(network *net, image im, float thresh, float hier_thresh)
{
// 参考函数 test_detector 和 src/image.c 的 draw_detections

set_batch_network(net, 1);
srand(2222222);
double time;
char buff[256];
char *input = buff;
int j;
float nms=.3;

image sized = letterbox_image(im, (*net).w, (*net).h);

layer l = (*net).layers[(*net).n-1];

box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
float **masks = 0;
if (l.coords > 4){
masks = calloc(l.w*l.h*l.n, sizeof(float*));
for(j = 0; j < l.w*l.h*l.n; ++j) masks[j] = calloc(l.coords-4, sizeof(float *));
}

float *X = sized.data;
time=what_time_is_it_now();
network_predict(*net, X);
printf("Predicted in %f seconds.\n", what_time_is_it_now()-time);
get_region_boxes(l, im.w, im.h, (*net).w, (*net).h, thresh, probs, boxes, masks, 0, 0, hier_thresh, 1);
if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
//else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);

int i, count=0;
detect_result[0].validate_number = 0;
for(i = 0; i < l.w*l.h*l.n; ++i){
int class = max_index(probs[i], l.classes);
float prob = probs[i][class];
if(prob > thresh){
int width = im.h * .006;
//printf("%s: %.0f%%\n", names[class], prob*100);
box b = boxes[i];
int left  = (b.x-b.w/2.)*im.w;
int right = (b.x+b.w/2.)*im.w;
int top   = (b.y-b.h/2.)*im.h;
int bot   = (b.y+b.h/2.)*im.h;

if(left < 0) left = 0;
if(right > im.w-1) right = im.w-1;
if(top < 0) top = 0;
if(bot > im.h-1) bot = im.h-1;

detect_result[0].validate_number = count + 1;
detect_result[count].prob = prob;
detect_result[count].box.id = class;
detect_result[count].box.left = left;
detect_result[count].box.right = right;
detect_result[count].box.top = top;
detect_result[count].box.bottom = bot;
count++;
}
}

free_image(im);
free_image(sized);
free(boxes);
free_ptrs((void **)probs, l.w*l.h*l.n);

return detect_result;
}


3）np_to_image 实现 c 代码中的 numpy 格式到 image 格式的转换

针对视频检测，往往是 python 里获得图像流，格式为 numpy.ndarray，需要借助这个接口将其转换为 darknet 的 image 格式

// src/image.c 内添加
image np_to_image(unsigned char* data, int w, int h, int c)
{
image out = make_image(w, h, c);

int i,j,k;
int step = c*w;

for(i = 0; i < h; ++i){
for(k= 0; k < c; ++k){
for(j = 0; j < w; ++j){
out.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.;
}
}
}

return out;
}


修改 makefile
作者的 MakeFile 里将编译目标分为了 OBJ 和 EXECOBJA，前者会编译进库文件，后者仅编译进可执行文件

将 EXECOBJA 的 “detector.o”挪到 OBJ 下，因为 detect_img 在该编译单元实现；同时在 OBJ 下添加 “extern_var.o”

修改完成后编译就可以了：

make clean
make


修改 darknet.py 实现检测
1）首先需要定义 detect_img 的返回值，与前面的结构体 boxs_label 保持一致

class BOX_LABEL(Structure):
_fields_ = [("id", c_int),
("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float),
("left", c_float),
("right", c_float),
("top", c_float),
("bottom", c_float)]

class BOXS_LABEL(Structure):
_fields_ = [("validate_number", c_int),
("prob", c_float),
("box", BOX_LABEL)]


2）实现 detect

def detect(net, out_size, names, im):
detect_im = lib.detect_img
detect_im.argtypes = [c_void_p, IMAGE, c_float, c_float]
detect_im.restype = POINTER(BOXS_LABEL*out_size)
results = detect_im(net, im, 0.24, 0.5)

objects_num = results.contents[0].validate_number
objects_list = list()
for _num in range(objects_num):
objects_ = [names[results.contents[_num].box.id].strip(), results.contents[_num].prob, (results.contents[_num].box.left, results.contents[_num].box.right, results.contents[_num].box.top, results.contents[_num].box.bottom)]
objects_list.append(objects_)
print 'detect \'%s\' in (left:%d, right:%d, top:%d, bottom:%d) with prob %.3f'%(objects_[0], objects_[2][0], objects_[2][1], objects_[2][2], objects_[2][3], objects_[1])

return objects_list
#################################################################################

def draw_objects(image, objects):
for object_ in objects:
cv2.rectangle(image, (int(object_[2][0]), int(object_[2][2])), (int(object_[2][1]), int(object_[2][3])), \
(0,0,255), thickness=4)
text = '%s-%.4f'%(object_[0], object_[1])
cv2.putText(image, text, (int(object_[2][0]), int(object_[2][2]-5)), \
cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), thickness=4)
return image


3）实现 numpy.ndarry 到 image 格式的转换

这里参考这篇文章，使用 numpy.ctypes.data_as 得到数据的 ctypes 指针，供 c 调用

此外，这里还介绍了 Numpy-C-API 的使用，但由于涉及到 ctypes，比较麻烦，故选用上面的方式

def load_img_from_np(img_np, w, h, c):
load_image_np = lib.np_to_image
load_image_np.argtypes = [POINTER(c_uint), c_int, c_int, c_int]
load_image_np.restype = IMAGE

img_np = img_np.astype(np.uint8)
return load_image_np(img_np.ctypes.data_as(POINTER(c_uint)), w, h, c)


4）完整的 python/darknet.py

from ctypes import *
import numpy as np
import cv2

class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]

class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]

class BOX_LABEL(Structure):
_fields_ = [("id", c_int),
("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float),
("left", c_float),
("right", c_float),
("top", c_float),
("bottom", c_float)]

class BOXS_LABEL(Structure):
_fields_ = [("validate_number", c_int),
("prob", c_float),
("box", BOX_LABEL)]

lib = CDLL("/home/cothink_tech/darknet/libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

def load_meta(f):
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
return lib.get_metadata(f)

def load_net(cfg, weights):
load_network = lib.load_network_p
load_network.argtypes = [c_char_p, c_char_p, c_int]
load_network.restype = c_void_p
return load_network(cfg, weights, 0)

def load_img(f):
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
return load_image(f, 0, 0)

def load_img_from_np(img_np, w, h, c):
load_image_np = lib.np_to_image
load_image_np.argtypes = [POINTER(c_uint), c_int, c_int, c_int]
load_image_np.restype = IMAGE

img_np = img_np.astype(np.uint8)
return load_image_np(img_np.ctypes.data_as(POINTER(c_uint)), w, h, c)

def letterbox_img(im, w, h):
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
return letterbox_image(im, w, h)

def predict(net, im):
pred = lib.network_predict_image
pred.argtypes = [c_void_p, IMAGE]
pred.restype = POINTER(c_float)
return pred(net, im)

def classify(net, meta, im):
out = predict(net, im)
res = []
for i in range(meta.classes):
res.append((meta.names[i], out[i]))
res = sorted(res, key=lambda x: -x[1])
return res

def detect(net, out_size, names, im):
detect_im = lib.detect_img
detect_im.argtypes = [c_void_p, IMAGE, c_float, c_float]
detect_im.restype = POINTER(BOXS_LABEL*out_size)
results = detect_im(net, im, 0.24, 0.5)

objects_num = results.contents[0].validate_number
objects_list = list()
for _num in range(objects_num):
objects_ = [names[results.contents[_num].box.id].strip(), results.contents[_num].prob, (results.contents[_num].box.left, results.contents[_num].box.right, results.contents[_num].box.top, results.contents[_num].box.bottom)]
objects_list.append(objects_)
print 'detect \'%s\' in (left:%d, right:%d, top:%d, bottom:%d) with prob %.3f'%(objects_[0], objects_[2][0], objects_[2][1], objects_[2][2], objects_[2][3], objects_[1])

return objects_list

def net_size(net):
net_w = lib.network_width
net_h = lib.network_height
net_w.argtypes = [c_void_p]
net_w.restype = c_int
net_h.argtypes = [c_void_p]
net_h.restype = c_int
return (net_w(net), net_h(net))

def net_output_size(net):
out_size = lib.get_last_layer_size
out_size.argtypes = [c_void_p]
out_size.restype = c_int
return out_size(net)

def read_names(f):
with open(f, 'r') as f_:
names = f_.readlines()
return names

def draw_objects(image, objects):
for object_ in objects:
cv2.rectangle(image, (int(object_[2][0]), int(object_[2][2])), (int(object_[2][1]), int(object_[2][3])), \
(0,0,255), thickness=4)
text = '%s-%.4f'%(object_[0], object_[1])
cv2.putText(image, text, (int(object_[2][0]), int(object_[2][2]-5)), \
cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), thickness=4)
return image

if __name__ == "__main__":
net = load_net("cfg/my_tiny-yolo-voc.cfg", "/home/xxx/darknet/backup/my_tiny-yolo-voc.backup")
out_size = net_output_size(net)
names = read_names("data/my_voc.names")
'''
im = load_img("/home/xxx/py-faster-rcnn/trainval/JPEGImages/00135.jpg")
objects = detect(net, out_size, names, im)
img = draw_objects(im_np, objects)

resz = cv2.resize(img, (768, 512))
cv2.imshow('detect', resz)
cv2.waitKey(0)
'''
video_file = "/home/xxx/py-faster-rcnn/trainval/test.avi"
cap = cv2.VideoCapture(video_file)
if cap.isOpened():
while True:
ret, im_np = cap.read()
if not ret:
break
image = load_img_from_np(im_np, im_np.shape[1], im_np.shape[0], im_np.shape[2])

objects = detect(net, out_size, names, image)
img = draw_objects(im_np, objects)

resz = cv2.resize(img, (768, 512))
cv2.imshow('detect', resz)
cv2.waitKey(1)
内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理
标签：
相关文章推荐
新的分享
章节导航