Struck跟踪算法介绍及代码解读(三)
2014-09-24 09:44
197 查看
博客:http://blog.csdn.net/qianxin_dh
邮箱:qianxin_dh@163.com本人水平有限,代码理解难免有不正确的地方,希望大家能够见谅。
main.cpp
/* * Struck: Structured Output Tracking with Kernels * * Code to accompany the paper: * Struck: Structured Output Tracking with Kernels * Sam Hare, Amir Saffari, Philip H. S. Torr * International Conference on Computer Vision (ICCV), 2011 * * Copyright (C) 2011 Sam Hare, Oxford Brookes University, Oxford, UK * * This file is part of Struck. * * Struck is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Struck is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Struck. If not, see <http://www.gnu.org/licenses/>. * */ #include "Tracker.h" #include "Config.h" #include <iostream> #include <fstream> #include <opencv/cv.h> #include <opencv/highgui.h> #include "vot.hpp" using namespace std; using namespace cv; static const int kLiveBoxWidth = 80; static const int kLiveBoxHeight = 80; void rectangle(Mat& rMat, const FloatRect& rRect, const Scalar& rColour) { IntRect r(rRect); rectangle(rMat, Point(r.XMin(), r.YMin()), Point(r.XMax(), r.YMax()), rColour); } int main(int argc, char* argv[]) { // 读取文件对程序参数进行初始化 string configPath = "config.txt"; if (argc > 1) { configPath = argv[1]; } Config conf(configPath); //Config类主要读取config.txt中的参数 if (conf.features.size() == 0) { cout << "error: no features specified in config" << endl; return EXIT_FAILURE; } Tracker tracker(conf); //Check if --challenge was passed as an argument bool challengeMode = false; for (int i = 1; i < argc; i++) { if (strcmp("--challenge", argv[i]) == 0) { //判断是否有挑战模式(vot挑战) challengeMode = true; } } if (challengeMode) { //VOT(Visual object tracking)挑战,它提供了一个公共平台,目标是比较各种跟踪算法再短期跟踪内的性能,讨论视觉跟踪领域的发展。 //load region, images and prepare for output Mat frameOrig; Mat frame; VOT vot_io("region.txt", "images.txt", "output.txt"); vot_io.getNextImage(frameOrig); resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight)); cv::Rect initPos = vot_io.getInitRectangle(); vot_io.outputBoundingBox(initPos); float scaleW = (float)conf.frameWidth/frameOrig.cols; float scaleH = (float)conf.frameHeight/frameOrig.rows; FloatRect initBB_vot = FloatRect(initPos.x*scaleW, initPos.y*scaleH, initPos.width*scaleW, initPos.height*scaleH); tracker.Initialise(frame, initBB_vot); while (vot_io.getNextImage(frameOrig) == 1){ resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight)); tracker.Track(frame); const FloatRect& bb = tracker.GetBB(); float x = bb.XMin()/scaleW; float y = bb.YMin()/scaleH; float w = bb.Width()/scaleW; float h = bb.Height()/scaleH; cv::Rect output = cv::Rect(x,y,w,h); vot_io.outputBoundingBox(output); } return 0; } ofstream outFile; if (conf.resultsPath != "") { outFile.open(conf.resultsPath.c_str(), ios::out); //将程序写入resultpath if (!outFile) { cout << "error: could not open results file: " << conf.resultsPath << endl; return EXIT_FAILURE; } } // if no sequence specified then use the camera bool useCamera = (conf.sequenceName == ""); VideoCapture cap; int startFrame = -1; int endFrame = -1; FloatRect initBB; string imgFormat; float scaleW = 1.f; float scaleH = 1.f; if (useCamera) { if (!cap.open(0)) { cout << "error: could not start camera capture" << endl; return EXIT_FAILURE; } startFrame = 0; endFrame = INT_MAX; /* maximum (signed) int value */ Mat tmp; cap >> tmp; scaleW = (float)conf.frameWidth/tmp.cols; scaleH = (float)conf.frameHeight/tmp.rows; initBB = IntRect(conf.frameWidth/2-kLiveBoxWidth/2, conf.frameHeight/2-kLiveBoxHeight/2, kLiveBoxWidth, kLiveBoxHeight); cout << "press 'i' to initialise tracker" << endl; } else { // parse frames file string framesFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_frames.txt"; //girl_frames.txt的文件路径,该文件放在girl文件夹里,内容为0,501。 ifstream framesFile(framesFilePath.c_str(), ios::in); if (!framesFile) { cout << "error: could not open sequence frames file: " << framesFilePath << endl; return EXIT_FAILURE; } string framesLine; getline(framesFile, framesLine); sscanf(framesLine.c_str(), "%d,%d", &startFrame, &endFrame); //startFrame=0;endFrame=501; if (framesFile.fail() || startFrame == -1 || endFrame == -1) { cout << "error: could not parse sequence frames file" << endl; return EXIT_FAILURE; } imgFormat = conf.sequenceBasePath+"/"+conf.sequenceName+"/imgs/img%05d.png"; // read first frame to get size char imgPath[256]; sprintf(imgPath, imgFormat.c_str(), startFrame); //sprintf把格式化的数据写入某个字符串缓冲区(imgPath); Mat tmp = cv::imread(imgPath, 0); scaleW = (float)conf.frameWidth/tmp.cols; //=1; scaleH = (float)conf.frameHeight/tmp.rows; //=1; // read init box from ground truth file string gtFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_gt.txt"; //读取girl_gt.txt文件 ifstream gtFile(gtFilePath.c_str(), ios::in); if (!gtFile) { cout << "error: could not open sequence gt file: " << gtFilePath << endl; return EXIT_FAILURE; } string gtLine; getline(gtFile, gtLine); float xmin = -1.f; float ymin = -1.f; float width = -1.f; float height = -1.f; sscanf(gtLine.c_str(), "%f,%f,%f,%f", &xmin, &ymin, &width, &height); //128,46,104,127 if (gtFile.fail() || xmin < 0.f || ymin < 0.f || width < 0.f || height < 0.f) { cout << "error: could not parse sequence gt file" << endl; return EXIT_FAILURE; } initBB = FloatRect(xmin*scaleW, ymin*scaleH, width*scaleW, height*scaleH); } if (!conf.quietMode) { namedWindow("result"); } Mat result(conf.frameHeight, conf.frameWidth, CV_8UC3); bool paused = false; bool doInitialise = false; srand(conf.seed); for (int frameInd = startFrame; frameInd <= endFrame; ++frameInd) //逐帧处理 { Mat frame; if (useCamera) //若使用摄像头 { Mat frameOrig; cap >> frameOrig; resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight)); flip(frame, frame, 1); frame.copyTo(result); if (doInitialise) { if (tracker.IsInitialised()) { tracker.Reset(); } else { tracker.Initialise(frame, initBB); } doInitialise = false; } else if (!tracker.IsInitialised()) { rectangle(result, initBB, CV_RGB(255, 255, 255)); } } else //若读取图片序列 { char imgPath[256]; sprintf(imgPath, imgFormat.c_str(), frameInd); Mat frameOrig = cv::imread(imgPath, 0); if (frameOrig.empty()) { cout << "error: could not read frame: " << imgPath << endl; return EXIT_FAILURE; } resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight)); //将读取的每帧图像统一为320*240; cvtColor(frame, result, CV_GRAY2RGB); if (frameInd == startFrame) { tracker.Initialise(frame, initBB); //对第一帧进行初始化 } } if (tracker.IsInitialised()) { tracker.Track(frame); //开始跟踪 if (!conf.quietMode && conf.debugMode) { tracker.Debug(); //用于显示样本图像 } rectangle(result, tracker.GetBB(), CV_RGB(0, 255, 0)); if (outFile) { const FloatRect& bb = tracker.GetBB(); outFile << bb.XMin()/scaleW << "," << bb.YMin()/scaleH << "," << bb.Width()/scaleW << "," << bb.Height()/scaleH << endl; } //输出跟踪结果坐标 } if (!conf.quietMode) { imshow("result", result); //显示跟踪画面 int key = waitKey(paused ? 0 : 1); if (key != -1) { if (key == 27 || key == 113) // esc q { break; } else if (key == 112) // p { paused = !paused; } else if (key == 105 && useCamera) { doInitialise = true; } } if (conf.debugMode && frameInd == endFrame) { cout << "\n\nend of sequence, press any key to exit" << endl; waitKey(); } } } if (outFile.is_open()) { outFile.close(); } return EXIT_SUCCESS; }
Tracker.cpp
#include "Tracker.h" #include "Config.h" #include "ImageRep.h" #include "Sampler.h" #include "Sample.h" #include "GraphUtils/GraphUtils.h" #include "HaarFeatures.h" #include "RawFeatures.h" #include "HistogramFeatures.h" #include "MultiFeatures.h" #include "Kernels.h" #include "LaRank.h" #include <opencv/cv.h> #include <opencv/highgui.h> #include <Eigen/Core> #include <vector> #include <algorithm> using namespace cv; using namespace std; using namespace Eigen; Tracker::Tracker(const Config& conf) : //构造函数,对参数进行初始化 m_config(conf), m_initialised(false), m_pLearner(0), m_debugImage(2*conf.searchRadius+1, 2*conf.searchRadius+1, CV_32FC1), m_needsIntegralImage(false) { Reset(); } Tracker::~Tracker() { delete m_pLearner; for (int i = 0; i < (int)m_features.size(); ++i) { delete m_features[i]; delete m_kernels[i]; } } void Tracker::Reset() //因为初始化为haar特征核高斯核函数,所以m_needsIntegralImage = true,m_needsIntegralHist = false; { m_initialised = false; m_debugImage.setTo(0); if (m_pLearner) delete m_pLearner; for (int i = 0; i < (int)m_features.size(); ++i) { delete m_features[i]; delete m_kernels[i]; } m_features.clear(); m_kernels.clear(); m_needsIntegralImage = false; m_needsIntegralHist = false; int numFeatures = m_config.features.size(); vector<int> featureCounts; for (int i = 0; i < numFeatures; ++i) { switch (m_config.features[i].feature) { case Config::kFeatureTypeHaar: m_features.push_back(new HaarFeatures(m_config)); m_needsIntegralImage = true; break; case Config::kFeatureTypeRaw: m_features.push_back(new RawFeatures(m_config)); break; case Config::kFeatureTypeHistogram: m_features.push_back(new HistogramFeatures(m_config)); m_needsIntegralHist = true; break; } featureCounts.push_back(m_features.back()->GetCount()); switch (m_config.features[i].kernel) { case Config::kKernelTypeLinear: m_kernels.push_back(new LinearKernel()); break; case Config::kKernelTypeGaussian: m_kernels.push_back(new GaussianKernel(m_config.features[i].params[0])); break; case Config::kKernelTypeIntersection: m_kernels.push_back(new IntersectionKernel()); break; case Config::kKernelTypeChi2: m_kernels.push_back(new Chi2Kernel()); break; } } if (numFeatures > 1) { MultiFeatures* f = new MultiFeatures(m_features); m_features.push_back(f); MultiKernel* k = new MultiKernel(m_kernels, featureCounts); m_kernels.push_back(k); } m_pLearner = new LaRank(m_config, *m_features.back(), *m_kernels.back()); } void Tracker::Initialise(const cv::Mat& frame, FloatRect bb) { m_bb = IntRect(bb);//将目标框坐标转为int型 //该类主要实现了积分图计算 ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist); //后两个参数分别为true,false for (int i = 0; i < 1; ++i) { UpdateLearner(image);// 更新预测函数 } m_initialised = true; } void Tracker::Track(const cv::Mat& frame) { assert(m_initialised); ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist); //获得当前帧的积分图 vector<FloatRect> rects = Sampler::PixelSamples(m_bb, m_config.searchRadius); //抽样 vector<FloatRect> keptRects; keptRects.reserve(rects.size()); for (int i = 0; i < (int)rects.size(); ++i) { if (!rects[i].IsInside(image.GetRect())) continue; keptRects.push_back(rects[i]); //将超出图像范围的框舍弃,剩余的保留在keptRects中 } MultiSample sample(image, keptRects); //多样本类,主要包括样本框以及ImageRep image vector<double> scores; m_pLearner->Eval(sample, scores); //scores里存放的是论文中公式(10)后半部分 double bestScore = -DBL_MAX; int bestInd = -1; for (int i = 0; i < (int)keptRects.size(); ++i) { if (scores[i] > bestScore) { bestScore = scores[i]; bestInd = i; //找到bestScore } } UpdateDebugImage(keptRects, m_bb, scores);//更新debug图像,用于显示 if (bestInd != -1) { m_bb = keptRects[bestInd]; UpdateLearner(image); #if VERBOSE cout << "track score: " << bestScore << endl; #endif } } void Tracker::UpdateDebugImage(const vector<FloatRect>& samples, const FloatRect& centre, const vector<double>& scores) { double mn = VectorXd::Map(&scores[0], scores.size()).minCoeff(); //Map:将现存的结构映射到Eigen的数据结构里,进行计算 double mx = VectorXd::Map(&scores[0], scores.size()).maxCoeff(); //R.minCoeff()=min(R(:)), R.maxCoeff()=max(R(:)); m_debugImage.setTo(0); //置为全黑色 for (int i = 0; i < (int)samples.size(); ++i) { int x = (int)(samples[i].XMin() - centre.XMin()); int y = (int)(samples[i].YMin() - centre.YMin()); m_debugImage.at<float>(m_config.searchRadius+y,m_config.searchRadius+x)=(float)((scores[i]-mn)/(mx-mn));//scores得分越大的框,会在m_debugImage上具有越大的值,即该点越亮(类似于置信图) } } void Tracker::Debug() { imshow("tracker", m_debugImage); //显示m_debugImage图像 m_pLearner->Debug(); } void Tracker::UpdateLearner(const ImageRep& image) //更新预测函数 { // note these return the centre sample at index 0 vector<FloatRect> rects = Sampler::RadialSamples(m_bb, 2*m_config.searchRadius, 5, 16);//5*16=80,加上一个原始rect,共包含81个rect //vector<FloatRect> rects = Sampler::PixelSamples(m_bb, 2*m_config.searchRadius, true); vector<FloatRect> keptRects; keptRects.push_back(rects[0]); // 原始目标框 for (int i = 1; i < (int)rects.size(); ++i) { if (!rects[i].IsInside(image.GetRect())) continue; //判断生成的样本框是否超出图像范围,超出的舍弃 keptRects.push_back(rects[i]); } #if VERBOSE cout << keptRects.size() << " samples" << endl; #endif MultiSample sample(image, keptRects); //多样本类对象sample,包含ImageRep& image,以及保留下来样本框 m_pLearner->Update(sample, 0); //更新,在LaRank类下实现 }
LaRank.h
#ifndef LARANK_H #define LARANK_H #include "Rect.h" #include "Sample.h" #include <vector> #include <Eigen/Core> #include <opencv/cv.h> class Config; class Features; class Kernel; class LaRank //文献《Solving multiclass support vector machine with LaRank》,该类实现了struck算法的主要步骤 { public: LaRank(const Config& conf, const Features& features, const Kernel& kernel); //初始化参数,特征值,核 ~LaRank(); virtual void Eval(const MultiSample& x, std::vector<double>& results); virtual void Update(const MultiSample& x, int y); virtual void Debug(); private: struct SupportPattern { std::vector<Eigen::VectorXd> x; //特征值 std::vector<FloatRect> yv; //变化关系 std::vector<cv::Mat> images; //图像片 int y; //索引值 int refCount; //统计sp的个数? }; struct SupportVector { SupportPattern* x; int y; double b; //beta double g; //gradient cv::Mat image; }; const Config& m_config; const Features& m_features; const Kernel& m_kernel; std::vector<SupportPattern*> m_sps; std::vector<SupportVector*> m_svs; cv::Mat m_debugImage; double m_C; Eigen::MatrixXd m_K; inline double Loss(const FloatRect& y1, const FloatRect& y2) const //损失函数 { // overlap loss return 1.0-y1.Overlap(y2); // squared distance loss //double dx = y1.XMin()-y2.XMin(); //double dy = y1.YMin()-y2.YMin(); //return dx*dx+dy*dy; } double ComputeDual() const; void SMOStep(int ipos, int ineg); std::pair<int, double> MinGradient(int ind); void ProcessNew(int ind); void Reprocess(); void ProcessOld(); void Optimize(); int AddSupportVector(SupportPattern* x, int y, double g); void RemoveSupportVector(int ind); void RemoveSupportVectors(int ind1, int ind2); void SwapSupportVectors(int ind1, int ind2); void BudgetMaintenance(); void BudgetMaintenanceRemove(); double Evaluate(const Eigen::VectorXd& x, const FloatRect& y) const; void UpdateDebugImage(); }; #endif
LaRank.cpp
#include "LaRank.h" #include "Config.h" #include "Features.h" #include "Kernels.h" #include "Sample.h" #include "Rect.h" #include "GraphUtils/GraphUtils.h" #include <Eigen/Array> #include <opencv/highgui.h> static const int kTileSize = 30; using namespace cv; using namespace std; using namespace Eigen; static const int kMaxSVs = 2000; // TODO (only used when no budget) LaRank::LaRank(const Config& conf, const Features& features, const Kernel& kernel) : m_config(conf), m_features(features), m_kernel(kernel), m_C(conf.svmC) { int N = conf.svmBudgetSize > 0 ? conf.svmBudgetSize+2 : kMaxSVs; //N=100+2,特征向量的个数不能超过这个阈值 m_K = MatrixXd::Zero(N, N); //m_K表示核矩阵,102*102 m_debugImage = Mat(800, 600, CV_8UC3); } LaRank::~LaRank() { } double LaRank::Evaluate(const Eigen::VectorXd& x, const FloatRect& y) const //论文中公式10后半部分计算,即F { double f = 0.0; for (int i = 0; i < (int)m_svs.size(); ++i) { const SupportVector& sv = *m_svs[i]; f += sv.b*m_kernel.Eval(x, sv.x->x[sv.y]); //beta*高斯核 } return f; } void LaRank::Eval(const MultiSample& sample, std::vector<double>& results) { const FloatRect& centre(sample.GetRects()[0]); //原始目标框 vector<VectorXd> fvs; const_cast<Features&>(m_features).Eval(sample, fvs); //fvs存放haar特征值 results.resize(fvs.size()); for (int i = 0; i < (int)fvs.size(); ++i) { // express y in coord frame of centre sample FloatRect y(sample.GetRects()[i]); y.Translate(-centre.XMin(), -centre.YMin()); //将每个框的横纵坐标分别减去原始目标框的横纵坐标 results[i] = Evaluate(fvs[i], y); //计算每个框的F函数,结果保存在results中。 } } void LaRank::Update(const MultiSample& sample, int y) { // add new support pattern SupportPattern* sp = new SupportPattern; //定义一个sp const vector<FloatRect>& rects = sample.GetRects(); //获得所有的样本框 FloatRect centre = rects[y]; //原始目标框 for (int i = 0; i < (int)rects.size(); ++i) { // express r in coord frame of centre sample FloatRect r = rects[i]; r.Translate(-centre.XMin(), -centre.YMin()); //这就表示帧间目标位置变化关系 sp->yv.push_back(r); if (!m_config.quietMode && m_config.debugMode) { // store a thumbnail for each sample Mat im(kTileSize, kTileSize, CV_8UC1); IntRect rect = rects[i]; cv::Rect roi(rect.XMin(), rect.YMin(), rect.Width(), rect.Height()); //感兴趣的区域是那些抽取的样本区域 cv::resize(sample.GetImage().GetImage(0)(roi), im, im.size()); //0表示通道数,将感兴趣区域统一为30*30,并保存在sp里的images sp->images.push_back(im); } } // evaluate features for each sample sp->x.resize(rects.size()); //有多少个感兴趣的框,就有多少个特征值向量。 const_cast<Features&>(m_features).Eval(sample, sp->x); //将每个样本框计算得到的haar特征存入sp->x,这里关于haar特征的代码不再列出,我将代码提取出来单独写出一篇博客《http://blog.csdn.net/qianxin_dh/article/details/39268113》 sp->y = y; sp->refCount = 0; m_sps.push_back(sp); //存储sp ProcessNew((int)m_sps.size()-1); //执行该步骤,添加支持向量,并对beta值进行调整 BudgetMaintenance(); //保证支持向量没有超出限定阈值 for (int i = 0; i < 10; ++i) { Reprocess(); //包括processold:增加新的sv;optimize:在现有的sv基础上调整beta值 BudgetMaintenance(); } } void LaRank::BudgetMaintenance() { if (m_config.svmBudgetSize > 0) { while ((int)m_svs.size() > m_config.svmBudgetSize) { BudgetMaintenanceRemove(); //支持向量的个数超出阈值后,找到对于F函数影响最小的负sv,并移除。 } } } void LaRank::Reprocess() { ProcessOld(); //每个processold步骤伴随着10个optimize步骤。 for (int i = 0; i < 10; ++i) { Optimize(); } } double LaRank::ComputeDual() const { double d = 0.0; for (int i = 0; i < (int)m_svs.size(); ++i) { const SupportVector* sv = m_svs[i]; d -= sv->b*Loss(sv->x->yv[sv->y], sv->x->yv[sv->x->y]); for (int j = 0; j < (int)m_svs.size(); ++j) { d -= 0.5*sv->b*m_svs[j]->b*m_K(i,j); } } return d; } void LaRank::SMOStep(int ipos, int ineg) { if (ipos == ineg) return; SupportVector* svp = m_svs[ipos]; //定义一个正支持向量 SupportVector* svn = m_svs[ineg]; //定义一个负支持向量 assert(svp->x == svn->x); SupportPattern* sp = svp->x; //定义一个支持模式sp,将正支持向量的支持模式赋予sp #if VERBOSE cout << "SMO: gpos:" << svp->g << " gneg:" << svn->g << endl; #endif if ((svp->g - svn->g) < 1e-5) { #if VERBOSE cout << "SMO: skipping" << endl; #endif } else { //论文中的Algorithm步骤 double kii = m_K(ipos, ipos) + m_K(ineg, ineg) - 2*m_K(ipos, ineg); double lu = (svp->g-svn->g)/kii; // no need to clamp against 0 since we'd have skipped in that case double l = min(lu, m_C*(int)(svp->y == sp->y) - svp->b); svp->b += l; svn->b -= l; // update gradients for (int i = 0; i < (int)m_svs.size(); ++i) { SupportVector* svi = m_svs[i]; svi->g -= l*(m_K(i, ipos) - m_K(i, ineg)); } #if VERBOSE cout << "SMO: " << ipos << "," << ineg << " -- " << svp->b << "," << svn->b << " (" << l << ")" << endl; #endif } // check if we should remove either sv now if (fabs(svp->b) < 1e-8) //beta为0,该向量被移除 { RemoveSupportVector(ipos); if (ineg == (int)m_svs.size()) { // ineg and ipos will have been swapped during sv removal ineg = ipos; } } if (fabs(svn->b) < 1e-8) //beta=0,该向量被移除 { RemoveSupportVector(ineg); } } pair<int, double> LaRank::MinGradient(int ind) { const SupportPattern* sp = m_sps[ind]; pair<int, double> minGrad(-1, DBL_MAX); for (int i = 0; i < (int)sp->yv.size(); ++i) { double grad = -Loss(sp->yv[i], sp->yv[sp->y]) - Evaluate(sp->x[i], sp->yv[i]);//通过公式10找到最小梯度对应的样本框 if (grad < minGrad.second) { minGrad.first = i; minGrad.second = grad; } } return minGrad; } void LaRank::ProcessNew(int ind) //可以添加新的支持向量,增加的正负支持向量(sv)具有相同的支持模式(sp) { // gradient is -f(x,y) since loss=0 int ip = AddSupportVector(m_sps[ind], m_sps[ind]->y, -Evaluate(m_sps[ind]->x[m_sps[ind]->y],m_sps[ind]->yv[m_sps[ind]->y])); //处理当前新样本,将上一帧目标位置作为正向量加入 pair<int, double> minGrad = MinGradient(ind); //int,double分别是具有最小梯度的样本框存放的位置,最小梯度的数值 int in = AddSupportVector(m_sps[ind], minGrad.first, minGrad.second); //将当前具有最小梯度的样本作为负向量加入 SMOStep(ip, in); //Algorithm 1,更新beta和gradient值 } void LaRank::ProcessOld() //可以添加新的支持向量 { if (m_sps.size() == 0) return; // choose pattern to process int ind = rand() % m_sps.size(); //随机选取sp // find existing sv with largest grad and nonzero beta int ip = -1; double maxGrad = -DBL_MAX; for (int i = 0; i < (int)m_svs.size(); ++i) { if (m_svs[i]->x != m_sps[ind]) continue; const SupportVector* svi = m_svs[i]; if (svi->g > maxGrad && svi->b < m_C*(int)(svi->y == m_sps[ind]->y)) //找出符合该条件的,作为y+,后一个条件保证了y+是从现存的sv中找出,因此不会增加新的向量 { ip = i; maxGrad = svi->g; } } assert(ip != -1); if (ip == -1) return; // find potentially new sv with smallest grad pair<int, double> minGrad = MinGradient(ind); int in = -1; for (int i = 0; i < (int)m_svs.size(); ++i) { if (m_svs[i]->x != m_sps[ind]) continue; //找出满足该条件的,作为y- if (m_svs[i]->y == minGrad.first) { in = i; break; } } if (in == -1) { // add new sv in = AddSupportVector(m_sps[ind], minGrad.first, minGrad.second); //将该样本作为负sv加入 } SMOStep(ip, in); //更新beta和gradient的值 } void LaRank::Optimize() // { if (m_sps.size() == 0) return; // choose pattern to optimize int ind = rand() % m_sps.size(); //随机处理现存的sp int ip = -1; int in = -1; double maxGrad = -DBL_MAX; double minGrad = DBL_MAX; for (int i = 0; i < (int)m_svs.size(); ++i) { if (m_svs[i]->x != m_sps[ind]) continue; const SupportVector* svi = m_svs[i]; if(svi->g>maxGrad&&svi->b<m_C*(int)(svi->y==m_sps->[y])) //将满足该条件的作为y+ { ip = i; maxGrad = svi->g; } if (svi->g < minGrad) //将满足该条件的作为y- { in = i; minGrad = svi->g; } } assert(ip != -1 && in != -1); if (ip == -1 || in == -1) { // this shouldn't happen cout << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << endl; return; } SMOStep(ip, in); //更新beta和gradient } int LaRank::AddSupportVector(SupportPattern* x, int y, double g) { SupportVector* sv = new SupportVector; sv->b = 0.0; //beta初始化为0 sv->x = x; sv->y = y; sv->g = g; int ind = (int)m_svs.size(); m_svs.push_back(sv); x->refCount++; #if VERBOSE cout << "Adding SV: " << ind << endl; #endif // update kernel matrix for (int i = 0; i < ind; ++i) //计算核矩阵 { m_K(i,ind) = m_kernel.Eval(m_svs[i]->x->x[m_svs[i]->y], x->x[y]); m_K(ind,i) = m_K(i,ind); } m_K(ind,ind) = m_kernel.Eval(x->x[y]); return ind; } void LaRank::SwapSupportVectors(int ind1, int ind2) { SupportVector* tmp = m_svs[ind1]; m_svs[ind1] = m_svs[ind2]; m_svs[ind2] = tmp; VectorXd row1 = m_K.row(ind1); m_K.row(ind1) = m_K.row(ind2); m_K.row(ind2) = row1; VectorXd col1 = m_K.col(ind1); m_K.col(ind1) = m_K.col(ind2); m_K.col(ind2) = col1; } void LaRank::RemoveSupportVector(int ind) { #if VERBOSE cout << "Removing SV: " << ind << endl; #endif m_svs[ind]->x->refCount--; if (m_svs[ind]->x->refCount == 0) { // also remove the support pattern for (int i = 0; i < (int)m_sps.size(); ++i) { if (m_sps[i] == m_svs[ind]->x) { delete m_sps[i]; m_sps.erase(m_sps.begin()+i); break; } } } // make sure the support vector is at the back, this // lets us keep the kernel matrix cached and valid if (ind < (int)m_svs.size()-1) { SwapSupportVectors(ind, (int)m_svs.size()-1); ind = (int)m_svs.size()-1; } delete m_svs[ind]; m_svs.pop_back(); } void LaRank::BudgetMaintenanceRemove() { // find negative sv with smallest effect on discriminant function if removed double minVal = DBL_MAX; int in = -1; int ip = -1; for (int i = 0; i < (int)m_svs.size(); ++i) { if (m_svs[i]->b < 0.0) //找到负sv { // find corresponding positive sv int j = -1; for (int k = 0; k < (int)m_svs.size(); ++k) { if (m_svs[k]->b > 0.0 && m_svs[k]->x == m_svs[i]->x) //找到同一支持模式下的正sv { j = k; break; } } double val = m_svs[i]->b*m_svs[i]->b*(m_K(i,i) + m_K(j,j) - 2.0*m_K(i,j)); if (val < minVal) //找到对F影响最小的sv { minVal = val; in = i; ip = j; } } } // adjust weight of positive sv to compensate for removal of negative m_svs[ip]->b += m_svs[in]->b; //将负sv移除,其相应的beta值需补偿到正sv上。 // remove negative sv RemoveSupportVector(in); if (ip == (int)m_svs.size()) { // ip and in will have been swapped during support vector removal ip = in; } if (m_svs[ip]->b < 1e-8) //beta值为0,移除该向量 { // also remove positive sv RemoveSupportVector(ip); } // update gradients // TODO: this could be made cheaper by just adjusting incrementally rather than recomputing for (int i = 0; i < (int)m_svs.size(); ++i) { SupportVector& svi = *m_svs[i]; svi.g = -Loss(svi.x->yv[svi.y],svi.x->yv[svi.x->y]) - Evaluate(svi.x->x[svi.y], svi.x->yv[svi.y]); } } void LaRank::Debug() { cout << m_sps.size() << "/" << m_svs.size() << " support patterns/vectors" << endl; UpdateDebugImage(); imshow("learner", m_debugImage); } void LaRank::UpdateDebugImage() //该函数主要用于样本显示,与算法关系不大,这里不做分析了 { m_debugImage.setTo(0); int n = (int)m_svs.size(); if (n == 0) return; const int kCanvasSize = 600; int gridSize = (int)sqrtf((float)(n-1)) + 1; int tileSize = (int)((float)kCanvasSize/gridSize); if (tileSize < 5) { cout << "too many support vectors to display" << endl; return; } Mat temp(tileSize, tileSize, CV_8UC1); int x = 0; int y = 0; int ind = 0; float vals[kMaxSVs]; memset(vals, 0, sizeof(float)*n); int drawOrder[kMaxSVs]; for (int set = 0; set < 2; ++set) { for (int i = 0; i < n; ++i) { if (((set == 0) ? 1 : -1)*m_svs[i]->b < 0.0) continue; drawOrder[ind] = i; vals[ind] = (float)m_svs[i]->b; ++ind; Mat I = m_debugImage(cv::Rect(x, y, tileSize, tileSize)); resize(m_svs[i]->x->images[m_svs[i]->y], temp, temp.size()); cvtColor(temp, I, CV_GRAY2RGB); double w = 1.0; rectangle(I, Point(0, 0), Point(tileSize-1, tileSize-1), (m_svs[i]->b > 0.0) ? CV_RGB(0, (uchar)(255*w), 0) : CV_RGB((uchar)(255*w), 0, 0), 3); x += tileSize; if ((x+tileSize) > kCanvasSize) { y += tileSize; x = 0; } } } const int kKernelPixelSize = 2; int kernelSize = kKernelPixelSize*n; double kmin = m_K.minCoeff(); double kmax = m_K.maxCoeff(); if (kernelSize < m_debugImage.cols && kernelSize < m_debugImage.rows) { Mat K = m_debugImage(cv::Rect(m_debugImage.cols-kernelSize, m_debugImage.rows-kernelSize, kernelSize, kernelSize)); for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { Mat Kij = K(cv::Rect(j*kKernelPixelSize, i*kKernelPixelSize, kKernelPixelSize, kKernelPixelSize)); uchar v = (uchar)(255*(m_K(drawOrder[i], drawOrder[j])-kmin)/(kmax-kmin)); Kij.setTo(Scalar(v, v, v)); } } } else { kernelSize = 0; } Mat I = m_debugImage(cv::Rect(0, m_debugImage.rows - 200, m_debugImage.cols-kernelSize, 200)); I.setTo(Scalar(255,255,255)); IplImage II = I; setGraphColor(0); drawFloatGraph(vals, n, &II, 0.f, 0.f, I.cols, I.rows); }
相关文章推荐
- Struck跟踪算法介绍及代码解读(一)
- Struck跟踪算法介绍及代码解读(二)
- 时间序列数据库KDB 与Java结合使用介绍 -- 1 KDB Java代码解读
- 【导读】本文介绍如何利用带进度条的ASP无组件实现断点续传下载,给出详细代码
- TabBars代码解读之——Tabbar栏与其他窗口之间的关系
- 分形介绍 && 一个简单的Kotch curve实现代码
- TabBars代码解读之——代码注释与恢复功能
- TabBars代码解读之——Tabbar栏与其他窗口之间的关系
- 介绍一个完全托管代码的对象数据库DB4O
- 条码打印机代码解读!
- Java代码缺陷自动分析工具介绍
- IOS 身份证校验详细介绍及示例代码
- 解读ASP.NET Portal Starter Kit(3)——代码文件篇
- 接口介绍(在.NET下编写中文代码程序)
- prototype1.3.1源代码解读
- 解读WINCE 5.0 KITL代码流程
- 介绍一个VC中代码缩进的工具
- TabBars代码解读之——Visual Studio的自动化接口
- 本文介绍了ASP.NET常用的一些代码
- vlc 代码分析(2)——vlc中vlm介绍