【技术层】##目标检测##源码笔记##YOLO源码,darknet 轻量级深度学习框架,源码笔记 [1] introduction
参考:
https://pjreddie.com/darknet/yolo/
https://github.com/AlexeyAB/darknet
YOLO
one-stage object detection algorithm
作者给出的框架c,cuda写的,轻量高效深度学习框架,caffe的架构也差不多
yolo也train过几十次了,源码写的还是很不错的
Getting started
深度学习目标检测等基础知识:
CNN卷积神经网络基本操作(卷积,池化),softmax,NMS等基本操作;VOC COCO 数据集格式
可以参考这一篇综述:
目标检测综述 arXiv1809.02165 Deep Learning for Generic Object Detection: A Surveyhttps://blog.csdn.net/husterjwx/article/details/88086110
数据输入
[code]./darknet detector test cfg/coco.data cfg/yolov3.cfg yolov3.weights data/dog.jpg
[code]/* Note C语言规定main函数的参数只能有两个,习惯上这两个参数写为argc和argv int argc//参数个数 char **argv //argc个指针数组,用于存放程序参数 如果没有这两个参数,那么默认的argc为1,argc为 ./darknet (就是可执行文件文件名) 注意这里的argc,argv是从操作系统命令行获得的 */ #include<stdio.h> void main(int argc,char**argv) { int i; printf("argc=%d\n",argc); for(i=0;i<argc;i++) { printf("i=%d:%s\n",i,*(argv+i)); } } jiangwenxiang@amax:~/darknet/c_practice$ ./practice_02 jwx is huster argc=4 i=0:./practice_02 i=1:jwx i=2:is i=3:huster
cfg/coco.data:
[code]classes= 80 train = /home/pjreddie/data/coco/trainvalno5k.txt #valid = coco_testdev valid = data/coco_val_5k.list names = data/coco.names backup = /home/pjreddie/backup/ eval=coco
存train val 图片和groundtruth的txt的地址,目标的name文件的地址
cfg/yolov3.cfg::网络配置文件,整体架构network和layer两个struct结构体构成
[code][net] # Testing batch=1 subdivisions=1 # Training # batch=64 # subdivisions=8 width=416 height=416 channels=3 momentum=0.9 decay=0.0005 angle=0 saturation = 1.5 exposure = 1.5 hue=.1 learning_rate=0.001 burn_in=1000 max_batches = 500200 policy=steps steps=400000,450000 scales=.1,.1 [convolutional] batch_normalize=1 filters=32 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2
yolov3.weights:网络权重文件
data/dog.jpg:测试图片
weights 用来存训练好的CNN的权重等
整体架构
测试部分
examples/detector.c
./darknet detector test cfg/coco.data cfg/yolov3.cfg yolov3.weights data/dog.jpg
[code]int main(int argc, char **argv) { //test_resize("data/bad.jpg"); //test_box(); //test_convolutional_layer(); if(argc < 2){ fprintf(stderr, "usage: %s <function>\n", argv[0]); //操作系统参数少于2个就打印错误信息 return 0; } gpu_index = find_int_arg(argc, argv, "-i", 0); //找到 -i 0,1,2,3 后面的GPU序号 仅仅一个GPU序号 -i 8 表示GPU序号8 if(find_arg(argc, argv, "-nogpu")) { gpu_index = -1; } #ifndef GPU gpu_index = -1; #else if(gpu_index >= 0){ cuda_set_device(gpu_index); } #endif if (0 == strcmp(argv[1], "average")){ average(argc, argv); } else if (0 == strcmp(argv[1], "yolo")){ run_yolo(argc, argv); } else if (0 == strcmp(argv[1], "super")){ run_super(argc, argv); } else if (0 == strcmp(argv[1], "lsd")){ run_lsd(argc, argv); } else if (0 == strcmp(argv[1], "detector")){ run_detector(argc, argv); } else if (0 == strcmp(argv[1], "detect")){ float thresh = find_float_arg(argc, argv, "-thresh", .5); char *filename = (argc > 4) ? argv[4]: 0; char *outfile = find_char_arg(argc, argv, "-out", 0); int fullscreen = find_arg(argc, argv, "-fullscreen"); test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); } else if (0 == strcmp(argv[1], "cifar")){ run_cifar(argc, argv); } else if (0 == strcmp(argv[1], "go")){ run_go(argc, argv); } else if (0 == strcmp(argv[1], "rnn")){ run_char_rnn(argc, argv); } else if (0 == strcmp(argv[1], "coco")){ run_coco(argc, argv); } else if (0 == strcmp(argv[1], "classify")){ predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); } else if (0 == strcmp(argv[1], "classifier")){ run_classifier(argc, argv); } else if (0 == strcmp(argv[1], "regressor")){ run_regressor(argc, argv); } else if (0 == strcmp(argv[1], "isegmenter")){ run_isegmenter(argc, argv); } else if (0 == strcmp(argv[1], "segmenter")){ run_segmenter(argc, argv); } else if (0 == strcmp(argv[1], "art")){ run_art(argc, argv); } else if (0 == strcmp(argv[1], "tag")){ run_tag(argc, argv); } else if (0 == strcmp(argv[1], "3d")){ composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); } else if (0 == strcmp(argv[1], "test")){ test_resize(argv[2]); } else if (0 == strcmp(argv[1], "captcha")){ run_captcha(argc, argv); } else if (0 == strcmp(argv[1], "nightmare")){ run_nightmare(argc, argv); } else if (0 == strcmp(argv[1], "rgbgr")){ rgbgr_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "reset")){ reset_normalize_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "denormalize")){ denormalize_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "statistics")){ statistics_net(argv[2], argv[3]); } else if (0 == strcmp(argv[1], "normalize")){ normalize_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "rescale")){ rescale_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "ops")){ operations(argv[2]); } else if (0 == strcmp(argv[1], "speed")){ speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); } else if (0 == strcmp(argv[1], "oneoff")){ oneoff(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "oneoff2")){ oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); } else if (0 == strcmp(argv[1], "print")){ print_weights(argv[2], argv[3], atoi(argv[4])); } else if (0 == strcmp(argv[1], "partial")){ partial(argv[2], argv[3], argv[4], atoi(argv[5])); } else if (0 == strcmp(argv[1], "average")){ average(argc, argv); } else if (0 == strcmp(argv[1], "visualize")){ visualize(argv[2], (argc > 3) ? argv[3] : 0); } else if (0 == strcmp(argv[1], "mkimg")){ mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); } else if (0 == strcmp(argv[1], "imtest")){ test_resize(argv[2]); } else { fprintf(stderr, "Not an option: %s\n", argv[1]); } return 0; }
run_detector(argc, argv);
./darknet detector test cfg/coco.data cfg/yolov3.cfg yolov3.weights data/dog.jpg
[code]void run_detector(int argc, char **argv) { char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .5); float hier_thresh = find_float_arg(argc, argv, "-hier", .5); int cam_index = find_int_arg(argc, argv, "-c", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); int avg = find_int_arg(argc, argv, "-avg", 3); if(argc < 4){ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; } char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); char *outfile = find_char_arg(argc, argv, "-out", 0); int *gpus = 0; int gpu = 0; int ngpus = 0; if(gpu_list){ printf("%s\n", gpu_list); int len = strlen(gpu_list); ngpus = 1; int i; for(i = 0; i < len; ++i){ if (gpu_list[i] == ',') ++ngpus; } gpus = calloc(ngpus, sizeof(int)); for(i = 0; i < ngpus; ++i){ gpus[i] = atoi(gpu_list); gpu_list = strchr(gpu_list, ',')+1; } } else { gpu = gpu_index; gpus = &gpu; ngpus = 1; } int clear = find_arg(argc, argv, "-clear"); int fullscreen = find_arg(argc, argv, "-fullscreen"); int width = find_int_arg(argc, argv, "-w", 0); int height = find_int_arg(argc, argv, "-h", 0); int fps = find_int_arg(argc, argv, "-fps", 0); //int class = find_int_arg(argc, argv, "-class", 0); char *datacfg = argv[3]; char *cfg = argv[4]; //.name cfg weight filename要按照顺序写,这里不是写的find_char_arg char *weights = (argc > 5) ? argv[5] : 0; char *filename = (argc > 6) ? argv[6]: 0; if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); else if(0==strcmp(argv[2], "demo")) { list *options = read_data_cfg(datacfg); int classes = option_find_int(options, "classes", 20); char *name_list = option_find_str(options, "names", "data/names.list"); char **names = get_labels(name_list); demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); } //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); }
到这个
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
[code]void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) { list *options = read_data_cfg(datacfg); //读cfg char *name_list = option_find_str(options, "names", "data/names.list"); char **names = get_labels(name_list); image **alphabet = load_alphabet(); network *net = load_network(cfgfile, weightfile, 0); //初始化network和weights set_batch_network(net, 1); srand(2222222); double time; char buff[256]; char *input = buff; float nms=.45; while(1){ if(filename){ strncpy(input, filename, 256); } else { printf("Enter Image Path: "); fflush(stdout); input = fgets(input, 256, stdin); if(!input) return; strtok(input, "\n"); } image im = load_image_color(input,0,0); image sized = letterbox_image(im, net->w, net->h); //image sized = resize_image(im, net->w, net->h); //image sized2 = resize_max(im, net->w); //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); //resize_network(net, sized.w, sized.h); layer l = net->layers[net->n-1]; float *X = sized.data; time=what_time_is_it_now(); network_predict(net, X);//前传 printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); int nboxes = 0; detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); //printf("%d\n", nboxes); //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); if (nms) do_nms_sort(dets, nboxes, l.classes, nms); draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); free_detections(dets, nboxes); if(outfile){ save_image(im, outfile); } else{ save_image(im, "predictions"); #ifdef OPENCV cvNamedWindow("predictions", CV_WINDOW_NORMAL); if(fullscreen){ cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); } show_image(im, "predictions", 0); #endif } free_image(im); free_image(sized); if (filename) break; } }
搬砖去了,有空再更
- 深度学习(七十一)darknet 源码阅读
- NetApp DataONTAP核心技术学习笔记(一)
- Ado.Net 实体框架学习笔记4
- 深度学习(七)caffe源码c++学习笔记
- ASP.NET学习笔记--缓存技术
- 学习ASP.NET MVC5框架揭秘笔记-IIS/ASP.NET管道(二)
- 框架学习笔记:深度解析StrangeIoC内部运行机制
- 聚焦CSDN技术主题月:深度学习框架的重构与思考专场回顾
- .NET技术学习笔记:
- C#学习笔记④——.NET的OOP技术相关
- 学习ASP.NET MVC5框架揭秘笔记-ASP.NET MVC是如何运行的(二)
- 学习ASP.NET MVC5框架揭秘笔记-ASP.NET路由(七)
- 框架源码学习笔记
- 深度学习框架的比较(MXNet, Caffe, TensorFlow, Torch, Theano)
- 深度学习框架Caffe学习笔记(2)-MNIST手写数字识别例程
- 学习ASP.NET MVC5框架揭秘笔记-ASP.NET路由(十一)
- WCF学习笔记之 - 搭建WCF技术知识框架
- WCF学习笔记之 - 搭建WCF技术知识框架
- Quartz.NET(作业调度框架) 学习笔记(三)【Cron 表达式】
- ASP.NET 2.0高级编程学习笔记-第3章 应用程序和页面框架