您的位置：首页 > 编程语言

Struck跟踪算法介绍及代码解读(三）

2014-09-24 09:44 197 查看

博客：http://blog.csdn.net/qianxin_dh

邮箱：qianxin_dh@163.com

本人水平有限，代码理解难免有不正确的地方，希望大家能够见谅。

main.cpp

/*
* Struck: Structured Output Tracking with Kernels
*
* Code to accompany the paper:
*   Struck: Structured Output Tracking with Kernels
*   Sam Hare, Amir Saffari, Philip H. S. Torr
*   International Conference on Computer Vision (ICCV), 2011
*
* Copyright (C) 2011 Sam Hare, Oxford Brookes University, Oxford, UK
*
* This file is part of Struck.
*
* Struck is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Struck is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Struck.  If not, see <http://www.gnu.org/licenses/>.
*
*/

#include "Tracker.h"
#include "Config.h"

#include <iostream>
#include <fstream>

#include <opencv/cv.h>
#include <opencv/highgui.h>

#include "vot.hpp"

using namespace std;
using namespace cv;

static const int kLiveBoxWidth = 80;
static const int kLiveBoxHeight = 80;

void rectangle(Mat& rMat, const FloatRect& rRect, const Scalar& rColour)
{
IntRect r(rRect);
rectangle(rMat, Point(r.XMin(), r.YMin()), Point(r.XMax(), r.YMax()), rColour);
}

int main(int argc, char* argv[])
{
// 读取文件对程序参数进行初始化
string configPath = "config.txt";
if (argc > 1)
{
configPath = argv[1];
}
Config conf(configPath);       //Config类主要读取config.txt中的参数

if (conf.features.size() == 0)
{
cout << "error: no features specified in config" << endl;
return EXIT_FAILURE;
}

Tracker tracker(conf);

//Check if --challenge was passed as an argument
bool challengeMode = false;
for (int i = 1; i < argc; i++) {
if (strcmp("--challenge", argv[i]) == 0) {        //判断是否有挑战模式（vot挑战）
challengeMode = true;
}
}

if (challengeMode) {    //VOT(Visual object tracking)挑战，它提供了一个公共平台，目标是比较各种跟踪算法再短期跟踪内的性能，讨论视觉跟踪领域的发展。
//load region, images and prepare for output
Mat frameOrig;
Mat frame;
VOT vot_io("region.txt", "images.txt", "output.txt");
vot_io.getNextImage(frameOrig);
resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
cv::Rect initPos = vot_io.getInitRectangle();
vot_io.outputBoundingBox(initPos);
float scaleW = (float)conf.frameWidth/frameOrig.cols;
float scaleH = (float)conf.frameHeight/frameOrig.rows;

FloatRect initBB_vot = FloatRect(initPos.x*scaleW, initPos.y*scaleH, initPos.width*scaleW, initPos.height*scaleH);
tracker.Initialise(frame, initBB_vot);

while (vot_io.getNextImage(frameOrig) == 1){
resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));

tracker.Track(frame);
const FloatRect& bb = tracker.GetBB();
float x = bb.XMin()/scaleW;
float y = bb.YMin()/scaleH;
float w = bb.Width()/scaleW;
float h = bb.Height()/scaleH;

cv::Rect output = cv::Rect(x,y,w,h);

vot_io.outputBoundingBox(output);
}

return 0;
}

ofstream outFile;
if (conf.resultsPath != "")
{
outFile.open(conf.resultsPath.c_str(), ios::out);    //将程序写入resultpath
if (!outFile)
{
cout << "error: could not open results file: " << conf.resultsPath << endl;
return EXIT_FAILURE;
}
}

// if no sequence specified then use the camera
bool useCamera = (conf.sequenceName == "");

VideoCapture cap;

int startFrame = -1;
int endFrame = -1;
FloatRect initBB;
string imgFormat;
float scaleW = 1.f;
float scaleH = 1.f;

if (useCamera)
{
if (!cap.open(0))
{
cout << "error: could not start camera capture" << endl;
return EXIT_FAILURE;
}
startFrame = 0;
endFrame = INT_MAX;        /* maximum (signed) int value */
Mat tmp;
cap >> tmp;
scaleW = (float)conf.frameWidth/tmp.cols;
scaleH = (float)conf.frameHeight/tmp.rows;

initBB = IntRect(conf.frameWidth/2-kLiveBoxWidth/2, conf.frameHeight/2-kLiveBoxHeight/2, kLiveBoxWidth, kLiveBoxHeight);
cout << "press 'i' to initialise tracker" << endl;
}
else
{
// parse frames file
string framesFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_frames.txt";  //girl_frames.txt的文件路径，该文件放在girl文件夹里，内容为0，501。
ifstream framesFile(framesFilePath.c_str(), ios::in);
if (!framesFile)
{
cout << "error: could not open sequence frames file: " << framesFilePath << endl;
return EXIT_FAILURE;
}

string framesLine;
getline(framesFile, framesLine);
sscanf(framesLine.c_str(), "%d,%d", &startFrame, &endFrame);   //startFrame=0；endFrame=501；

if (framesFile.fail() || startFrame == -1 || endFrame == -1)
{
cout << "error: could not parse sequence frames file" << endl;
return EXIT_FAILURE;
}

imgFormat = conf.sequenceBasePath+"/"+conf.sequenceName+"/imgs/img%05d.png";

// read first frame to get size
char imgPath[256];
sprintf(imgPath, imgFormat.c_str(), startFrame);  //sprintf把格式化的数据写入某个字符串缓冲区(imgPath);
Mat tmp = cv::imread(imgPath, 0);
scaleW = (float)conf.frameWidth/tmp.cols;   //=1;
scaleH = (float)conf.frameHeight/tmp.rows; //=1;

// read init box from ground truth file
string gtFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_gt.txt";  //读取girl_gt.txt文件
ifstream gtFile(gtFilePath.c_str(), ios::in);
if (!gtFile)
{
cout << "error: could not open sequence gt file: " << gtFilePath << endl;
return EXIT_FAILURE;
}

string gtLine;
getline(gtFile, gtLine);
float xmin = -1.f;
float ymin = -1.f;
float width = -1.f;
float height = -1.f;
sscanf(gtLine.c_str(), "%f,%f,%f,%f", &xmin, &ymin, &width, &height);  //128,46,104,127

if (gtFile.fail() || xmin < 0.f || ymin < 0.f || width < 0.f || height < 0.f)
{
cout << "error: could not parse sequence gt file" << endl;
return EXIT_FAILURE;
}
initBB = FloatRect(xmin*scaleW, ymin*scaleH, width*scaleW, height*scaleH);
}

if (!conf.quietMode)
{
namedWindow("result");
}

Mat result(conf.frameHeight, conf.frameWidth, CV_8UC3);
bool paused = false;
bool doInitialise = false;
srand(conf.seed);

for (int frameInd = startFrame; frameInd <= endFrame; ++frameInd)    //逐帧处理
{
Mat frame;
if (useCamera)   //若使用摄像头
{
Mat frameOrig;
cap >> frameOrig;
resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
flip(frame, frame, 1);
frame.copyTo(result);
if (doInitialise)
{
if (tracker.IsInitialised())
{
tracker.Reset();
}
else
{
tracker.Initialise(frame, initBB);
}
doInitialise = false;
}
else if (!tracker.IsInitialised())
{
rectangle(result, initBB, CV_RGB(255, 255, 255));
}
}
else    //若读取图片序列
{
char imgPath[256];
sprintf(imgPath, imgFormat.c_str(), frameInd);
Mat frameOrig = cv::imread(imgPath, 0);
if (frameOrig.empty())
{
cout << "error: could not read frame: " << imgPath << endl;
return EXIT_FAILURE;
}

resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));  //将读取的每帧图像统一为320*240;
cvtColor(frame, result, CV_GRAY2RGB);

if (frameInd == startFrame)
{
tracker.Initialise(frame, initBB);                 //对第一帧进行初始化
}
}

if (tracker.IsInitialised())
{
tracker.Track(frame);                     //开始跟踪

if (!conf.quietMode && conf.debugMode)
{
tracker.Debug();  //用于显示样本图像
}

rectangle(result, tracker.GetBB(), CV_RGB(0, 255, 0));

if (outFile)
{
const FloatRect& bb = tracker.GetBB();
outFile << bb.XMin()/scaleW << "," << bb.YMin()/scaleH << "," << bb.Width()/scaleW << "," << bb.Height()/scaleH << endl;
}   //输出跟踪结果坐标
}

if (!conf.quietMode)
{
imshow("result", result);   //显示跟踪画面
int key = waitKey(paused ? 0 : 1);

if (key != -1)
{
if (key == 27 || key == 113) // esc q
{
break;
}
else if (key == 112) // p
{
paused = !paused;
}
else if (key == 105 && useCamera)
{
doInitialise = true;
}
}
if (conf.debugMode && frameInd == endFrame)
{
cout << "\n\nend of sequence, press any key to exit" << endl;
waitKey();
}
}
}

if (outFile.is_open())
{
outFile.close();
}

return EXIT_SUCCESS;
}

Tracker.cpp

#include "Tracker.h"
#include "Config.h"
#include "ImageRep.h"
#include "Sampler.h"
#include "Sample.h"
#include "GraphUtils/GraphUtils.h"

#include "HaarFeatures.h"
#include "RawFeatures.h"
#include "HistogramFeatures.h"
#include "MultiFeatures.h"

#include "Kernels.h"

#include "LaRank.h"

#include <opencv/cv.h>
#include <opencv/highgui.h>

#include <Eigen/Core>

#include <vector>
#include <algorithm>

using namespace cv;
using namespace std;
using namespace Eigen;

Tracker::Tracker(const Config& conf) :      //构造函数，对参数进行初始化
m_config(conf),
m_initialised(false),
m_pLearner(0),
m_debugImage(2*conf.searchRadius+1, 2*conf.searchRadius+1, CV_32FC1),
m_needsIntegralImage(false)
{
Reset();
}

Tracker::~Tracker()
{
delete m_pLearner;
for (int i = 0; i < (int)m_features.size(); ++i)
{
delete m_features[i];
delete m_kernels[i];
}
}

void Tracker::Reset()               //因为初始化为haar特征核高斯核函数，所以m_needsIntegralImage = true，m_needsIntegralHist = false;
{
m_initialised = false;
m_debugImage.setTo(0);
if (m_pLearner) delete m_pLearner;
for (int i = 0; i < (int)m_features.size(); ++i)
{
delete m_features[i];
delete m_kernels[i];
}
m_features.clear();
m_kernels.clear();

m_needsIntegralImage = false;
m_needsIntegralHist = false;

int numFeatures = m_config.features.size();
vector<int> featureCounts;
for (int i = 0; i < numFeatures; ++i)
{
switch (m_config.features[i].feature)
{
case Config::kFeatureTypeHaar:
m_features.push_back(new HaarFeatures(m_config));
m_needsIntegralImage = true;
break;
case Config::kFeatureTypeRaw:
m_features.push_back(new RawFeatures(m_config));
break;
case Config::kFeatureTypeHistogram:
m_features.push_back(new HistogramFeatures(m_config));
m_needsIntegralHist = true;
break;
}
featureCounts.push_back(m_features.back()->GetCount());

switch (m_config.features[i].kernel)
{
case Config::kKernelTypeLinear:
m_kernels.push_back(new LinearKernel());
break;
case Config::kKernelTypeGaussian:
m_kernels.push_back(new GaussianKernel(m_config.features[i].params[0]));
break;
case Config::kKernelTypeIntersection:
m_kernels.push_back(new IntersectionKernel());
break;
case Config::kKernelTypeChi2:
m_kernels.push_back(new Chi2Kernel());
break;
}
}

if (numFeatures > 1)
{
MultiFeatures* f = new MultiFeatures(m_features);
m_features.push_back(f);

MultiKernel* k = new MultiKernel(m_kernels, featureCounts);
m_kernels.push_back(k);
}

m_pLearner = new LaRank(m_config, *m_features.back(), *m_kernels.back());
}

void Tracker::Initialise(const cv::Mat& frame, FloatRect bb)
{
m_bb = IntRect(bb);//将目标框坐标转为int型
//该类主要实现了积分图计算
ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist);  //后两个参数分别为true，false
for (int i = 0; i < 1; ++i)
{
UpdateLearner(image);// 更新预测函数
}
m_initialised = true;
}

void Tracker::Track(const cv::Mat& frame)
{
assert(m_initialised);

ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist);   //获得当前帧的积分图

vector<FloatRect> rects = Sampler::PixelSamples(m_bb, m_config.searchRadius);  //抽样

vector<FloatRect> keptRects;
keptRects.reserve(rects.size());
for (int i = 0; i < (int)rects.size(); ++i)
{
if (!rects[i].IsInside(image.GetRect())) continue;
keptRects.push_back(rects[i]);        //将超出图像范围的框舍弃，剩余的保留在keptRects中
}

MultiSample sample(image, keptRects);     //多样本类，主要包括样本框以及ImageRep image

vector<double> scores;
m_pLearner->Eval(sample, scores);     //scores里存放的是论文中公式（10）后半部分

double bestScore = -DBL_MAX;
int bestInd = -1;
for (int i = 0; i < (int)keptRects.size(); ++i)
{
if (scores[i] > bestScore)
{
bestScore = scores[i];
bestInd = i;              //找到bestScore
}
}

UpdateDebugImage(keptRects, m_bb, scores);//更新debug图像，用于显示

if (bestInd != -1)
{
m_bb = keptRects[bestInd];
UpdateLearner(image);
#if VERBOSE
cout << "track score: " << bestScore << endl;
#endif
}
}

void Tracker::UpdateDebugImage(const vector<FloatRect>& samples, const FloatRect& centre, const vector<double>& scores)
{
double mn = VectorXd::Map(&scores[0], scores.size()).minCoeff();   //Map:将现存的结构映射到Eigen的数据结构里，进行计算
double mx = VectorXd::Map(&scores[0], scores.size()).maxCoeff();   //R.minCoeff()=min(R(:)), R.maxCoeff()=max(R(:));
m_debugImage.setTo(0);     //置为全黑色
for (int i = 0; i < (int)samples.size(); ++i)
{
int x = (int)(samples[i].XMin() - centre.XMin());
int y = (int)(samples[i].YMin() - centre.YMin());
m_debugImage.at<float>(m_config.searchRadius+y,m_config.searchRadius+x)=(float)((scores[i]-mn)/(mx-mn));//scores得分越大的框，会在m_debugImage上具有越大的值，即该点越亮（类似于置信图）
}
}

void Tracker::Debug()
{
imshow("tracker", m_debugImage);   //显示m_debugImage图像
m_pLearner->Debug();
}

void Tracker::UpdateLearner(const ImageRep& image)     //更新预测函数
{
// note these return the centre sample at index 0
vector<FloatRect> rects = Sampler::RadialSamples(m_bb, 2*m_config.searchRadius, 5, 16);//5*16=80,加上一个原始rect，共包含81个rect
//vector<FloatRect> rects = Sampler::PixelSamples(m_bb, 2*m_config.searchRadius, true);

vector<FloatRect> keptRects;
keptRects.push_back(rects[0]); // 原始目标框
for (int i = 1; i < (int)rects.size(); ++i)
{
if (!rects[i].IsInside(image.GetRect())) continue;   //判断生成的样本框是否超出图像范围，超出的舍弃
keptRects.push_back(rects[i]);
}

#if VERBOSE
cout << keptRects.size() << " samples" << endl;
#endif

MultiSample sample(image, keptRects);      //多样本类对象sample，包含ImageRep& image，以及保留下来样本框
m_pLearner->Update(sample, 0);       //更新，在LaRank类下实现
}

LaRank.h

#ifndef LARANK_H
#define LARANK_H

#include "Rect.h"
#include "Sample.h"

#include <vector>
#include <Eigen/Core>

#include <opencv/cv.h>

class Config;
class Features;
class Kernel;

class LaRank   //文献《Solving multiclass support vector machine with LaRank》，该类实现了struck算法的主要步骤
{
public:
LaRank(const Config& conf, const Features& features, const Kernel& kernel);  //初始化参数，特征值，核
~LaRank();

virtual void Eval(const MultiSample& x, std::vector<double>& results);
virtual void Update(const MultiSample& x, int y);

virtual void Debug();

private:

struct SupportPattern
{
std::vector<Eigen::VectorXd> x;   //特征值
std::vector<FloatRect> yv;        //变化关系
std::vector<cv::Mat> images;      //图像片
int y;                            //索引值
int refCount;                    //统计sp的个数？
};

struct SupportVector
{
SupportPattern* x;
int y;
double b;                //beta
double g;                 //gradient
cv::Mat image;
};

const Config& m_config;
const Features& m_features;
const Kernel& m_kernel;

std::vector<SupportPattern*> m_sps;
std::vector<SupportVector*> m_svs;

cv::Mat m_debugImage;

double m_C;
Eigen::MatrixXd m_K;

inline double Loss(const FloatRect& y1, const FloatRect& y2) const         //损失函数
{
// overlap loss
return 1.0-y1.Overlap(y2);
// squared distance loss
//double dx = y1.XMin()-y2.XMin();
//double dy = y1.YMin()-y2.YMin();
//return dx*dx+dy*dy;
}

double ComputeDual() const;

void SMOStep(int ipos, int ineg);
std::pair<int, double> MinGradient(int ind);
void ProcessNew(int ind);
void Reprocess();
void ProcessOld();
void Optimize();

int AddSupportVector(SupportPattern* x, int y, double g);
void RemoveSupportVector(int ind);
void RemoveSupportVectors(int ind1, int ind2);
void SwapSupportVectors(int ind1, int ind2);

void BudgetMaintenance();
void BudgetMaintenanceRemove();

double Evaluate(const Eigen::VectorXd& x, const FloatRect& y) const;
void UpdateDebugImage();
};

#endif

LaRank.cpp

#include "LaRank.h"

#include "Config.h"
#include "Features.h"
#include "Kernels.h"
#include "Sample.h"
#include "Rect.h"
#include "GraphUtils/GraphUtils.h"

#include <Eigen/Array>

#include <opencv/highgui.h>
static const int kTileSize = 30;
using namespace cv;

using namespace std;
using namespace Eigen;

static const int kMaxSVs = 2000; // TODO (only used when no budget)

LaRank::LaRank(const Config& conf, const Features& features, const Kernel& kernel) :
m_config(conf),
m_features(features),
m_kernel(kernel),
m_C(conf.svmC)
{
int N = conf.svmBudgetSize > 0 ? conf.svmBudgetSize+2 : kMaxSVs;     //N=100+2，特征向量的个数不能超过这个阈值
m_K = MatrixXd::Zero(N, N);            //m_K表示核矩阵，102*102
m_debugImage = Mat(800, 600, CV_8UC3);
}

LaRank::~LaRank()
{
}

double LaRank::Evaluate(const Eigen::VectorXd& x, const FloatRect& y) const  //论文中公式10后半部分计算，即F
{
double f = 0.0;
for (int i = 0; i < (int)m_svs.size(); ++i)
{
const SupportVector& sv = *m_svs[i];
f += sv.b*m_kernel.Eval(x, sv.x->x[sv.y]);       //beta*高斯核
}
return f;
}

void LaRank::Eval(const MultiSample& sample, std::vector<double>& results)
{
const FloatRect& centre(sample.GetRects()[0]);       //原始目标框
vector<VectorXd> fvs;
const_cast<Features&>(m_features).Eval(sample, fvs);     //fvs存放haar特征值
results.resize(fvs.size());
for (int i = 0; i < (int)fvs.size(); ++i)
{
// express y in coord frame of centre sample
FloatRect y(sample.GetRects()[i]);
y.Translate(-centre.XMin(), -centre.YMin());     //将每个框的横纵坐标分别减去原始目标框的横纵坐标
results[i] = Evaluate(fvs[i], y);         //计算每个框的F函数，结果保存在results中。
}
}

void LaRank::Update(const MultiSample& sample, int y)
{
// add new support pattern
SupportPattern* sp = new SupportPattern;        //定义一个sp
const vector<FloatRect>& rects = sample.GetRects();      //获得所有的样本框
FloatRect centre = rects[y];                     //原始目标框
for (int i = 0; i < (int)rects.size(); ++i)
{
// express r in coord frame of centre sample
FloatRect r = rects[i];
r.Translate(-centre.XMin(), -centre.YMin());   //这就表示帧间目标位置变化关系
sp->yv.push_back(r);
if (!m_config.quietMode && m_config.debugMode)
{
// store a thumbnail for each sample
Mat im(kTileSize, kTileSize, CV_8UC1);
IntRect rect = rects[i];
cv::Rect roi(rect.XMin(), rect.YMin(), rect.Width(), rect.Height());  //感兴趣的区域是那些抽取的样本区域
cv::resize(sample.GetImage().GetImage(0)(roi), im, im.size());       //0表示通道数，将感兴趣区域统一为30*30，并保存在sp里的images
sp->images.push_back(im);
}
}
// evaluate features for each sample
sp->x.resize(rects.size());    //有多少个感兴趣的框，就有多少个特征值向量。
const_cast<Features&>(m_features).Eval(sample, sp->x);    //将每个样本框计算得到的haar特征存入sp->x，这里关于haar特征的代码不再列出，我将代码提取出来单独写出一篇博客《http://blog.csdn.net/qianxin_dh/article/details/39268113》
sp->y = y;
sp->refCount = 0;
m_sps.push_back(sp);   //存储sp

ProcessNew((int)m_sps.size()-1);  //执行该步骤，添加支持向量，并对beta值进行调整
BudgetMaintenance();       //保证支持向量没有超出限定阈值

for (int i = 0; i < 10; ++i)
{
Reprocess();           //包括processold：增加新的sv；optimize：在现有的sv基础上调整beta值
BudgetMaintenance();
}
}

void LaRank::BudgetMaintenance()
{
if (m_config.svmBudgetSize > 0)
{
while ((int)m_svs.size() > m_config.svmBudgetSize)
{
BudgetMaintenanceRemove();  //支持向量的个数超出阈值后，找到对于F函数影响最小的负sv，并移除。
}
}
}

void LaRank::Reprocess()
{
ProcessOld();       //每个processold步骤伴随着10个optimize步骤。
for (int i = 0; i < 10; ++i)
{
Optimize();
}
}

double LaRank::ComputeDual() const
{
double d = 0.0;
for (int i = 0; i < (int)m_svs.size(); ++i)
{
const SupportVector* sv = m_svs[i];
d -= sv->b*Loss(sv->x->yv[sv->y], sv->x->yv[sv->x->y]);
for (int j = 0; j < (int)m_svs.size(); ++j)
{
d -= 0.5*sv->b*m_svs[j]->b*m_K(i,j);
}
}
return d;
}

void LaRank::SMOStep(int ipos, int ineg)
{
if (ipos == ineg) return;

SupportVector* svp = m_svs[ipos];    //定义一个正支持向量
SupportVector* svn = m_svs[ineg];    //定义一个负支持向量
assert(svp->x == svn->x);
SupportPattern* sp = svp->x;    //定义一个支持模式sp，将正支持向量的支持模式赋予sp

#if VERBOSE
cout << "SMO: gpos:" << svp->g << " gneg:" << svn->g << endl;
#endif
if ((svp->g - svn->g) < 1e-5)
{
#if VERBOSE
cout << "SMO: skipping" << endl;
#endif
}
else
{   //论文中的Algorithm步骤
double kii = m_K(ipos, ipos) + m_K(ineg, ineg) - 2*m_K(ipos, ineg);
double lu = (svp->g-svn->g)/kii;
// no need to clamp against 0 since we'd have skipped in that case
double l = min(lu, m_C*(int)(svp->y == sp->y) - svp->b);

svp->b += l;
svn->b -= l;

// update gradients
for (int i = 0; i < (int)m_svs.size(); ++i)
{
SupportVector* svi = m_svs[i];
svi->g -= l*(m_K(i, ipos) - m_K(i, ineg));
}
#if VERBOSE
cout << "SMO: " << ipos << "," << ineg << " -- " << svp->b << "," << svn->b << " (" << l << ")" << endl;
#endif
}

// check if we should remove either sv now

if (fabs(svp->b) < 1e-8)         //beta为0，该向量被移除
{
RemoveSupportVector(ipos);
if (ineg == (int)m_svs.size())
{
// ineg and ipos will have been swapped during sv removal
ineg = ipos;
}
}

if (fabs(svn->b) < 1e-8)  //beta＝0，该向量被移除
{
RemoveSupportVector(ineg);
}
}

pair<int, double> LaRank::MinGradient(int ind)
{
const SupportPattern* sp = m_sps[ind];
pair<int, double> minGrad(-1, DBL_MAX);
for (int i = 0; i < (int)sp->yv.size(); ++i)
{
double grad = -Loss(sp->yv[i], sp->yv[sp->y]) - Evaluate(sp->x[i], sp->yv[i]);//通过公式10找到最小梯度对应的样本框
if (grad < minGrad.second)
{
minGrad.first = i;
minGrad.second = grad;
}
}
return minGrad;
}

void LaRank::ProcessNew(int ind)  //可以添加新的支持向量，增加的正负支持向量(sv)具有相同的支持模式(sp)
{
// gradient is -f(x,y) since loss=0
int ip = AddSupportVector(m_sps[ind], m_sps[ind]->y, -Evaluate(m_sps[ind]->x[m_sps[ind]->y],m_sps[ind]->yv[m_sps[ind]->y]));  //处理当前新样本，将上一帧目标位置作为正向量加入

pair<int, double> minGrad = MinGradient(ind);  //int，double分别是具有最小梯度的样本框存放的位置，最小梯度的数值
int in = AddSupportVector(m_sps[ind], minGrad.first, minGrad.second);    //将当前具有最小梯度的样本作为负向量加入

SMOStep(ip, in);   //Algorithm 1,更新beta和gradient值
}

void LaRank::ProcessOld()  //可以添加新的支持向量
{
if (m_sps.size() == 0) return;

// choose pattern to process
int ind = rand() % m_sps.size();   //随机选取sp

// find existing sv with largest grad and nonzero beta
int ip = -1;
double maxGrad = -DBL_MAX;
for (int i = 0; i < (int)m_svs.size(); ++i)
{
if (m_svs[i]->x != m_sps[ind]) continue;

const SupportVector* svi = m_svs[i];
if (svi->g > maxGrad && svi->b < m_C*(int)(svi->y == m_sps[ind]->y))   //找出符合该条件的，作为y＋，后一个条件保证了y+是从现存的sv中找出，因此不会增加新的向量
{
ip = i;
maxGrad = svi->g;
}
}
assert(ip != -1);
if (ip == -1) return;

// find potentially new sv with smallest grad
pair<int, double> minGrad = MinGradient(ind);
int in = -1;
for (int i = 0; i < (int)m_svs.size(); ++i)
{
if (m_svs[i]->x != m_sps[ind]) continue;              //找出满足该条件的，作为y－

if (m_svs[i]->y == minGrad.first)
{
in = i;
break;
}
}
if (in == -1)
{
// add new sv
in = AddSupportVector(m_sps[ind], minGrad.first, minGrad.second);  //将该样本作为负sv加入
}

SMOStep(ip, in);    //更新beta和gradient的值
}

void LaRank::Optimize()    //
{
if (m_sps.size() == 0) return;

// choose pattern to optimize
int ind = rand() % m_sps.size();   //随机处理现存的sp

int ip = -1;
int in = -1;
double maxGrad = -DBL_MAX;
double minGrad = DBL_MAX;
for (int i = 0; i < (int)m_svs.size(); ++i)
{
if (m_svs[i]->x != m_sps[ind]) continue;

const SupportVector* svi = m_svs[i];
if(svi->g>maxGrad&&svi->b<m_C*(int)(svi->y==m_sps->[y]))   //将满足该条件的作为y+
{
ip = i;
maxGrad = svi->g;
}
if (svi->g < minGrad)                       //将满足该条件的作为y－
{
in = i;
minGrad = svi->g;
}
}
assert(ip != -1 && in != -1);
if (ip == -1 || in == -1)
{
// this shouldn't happen
cout << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << endl;
return;
}

SMOStep(ip, in);         //更新beta和gradient
}

int LaRank::AddSupportVector(SupportPattern* x, int y, double g)
{
SupportVector* sv = new SupportVector;
sv->b = 0.0;        //beta初始化为0
sv->x = x;
sv->y = y;
sv->g = g;

int ind = (int)m_svs.size();
m_svs.push_back(sv);
x->refCount++;

#if VERBOSE
cout << "Adding SV: " << ind << endl;
#endif

// update kernel matrix
for (int i = 0; i < ind; ++i)    //计算核矩阵
{
m_K(i,ind) = m_kernel.Eval(m_svs[i]->x->x[m_svs[i]->y], x->x[y]);
m_K(ind,i) = m_K(i,ind);
}
m_K(ind,ind) = m_kernel.Eval(x->x[y]);

return ind;
}

void LaRank::SwapSupportVectors(int ind1, int ind2)
{
SupportVector* tmp = m_svs[ind1];
m_svs[ind1] = m_svs[ind2];
m_svs[ind2] = tmp;

VectorXd row1 = m_K.row(ind1);
m_K.row(ind1) = m_K.row(ind2);
m_K.row(ind2) = row1;

VectorXd col1 = m_K.col(ind1);
m_K.col(ind1) = m_K.col(ind2);
m_K.col(ind2) = col1;
}

void LaRank::RemoveSupportVector(int ind)
{
#if VERBOSE
cout << "Removing SV: " << ind << endl;
#endif

m_svs[ind]->x->refCount--;
if (m_svs[ind]->x->refCount == 0)
{
// also remove the support pattern
for (int i = 0; i < (int)m_sps.size(); ++i)
{
if (m_sps[i] == m_svs[ind]->x)
{
delete m_sps[i];
m_sps.erase(m_sps.begin()+i);
break;
}
}
}

// make sure the support vector is at the back, this
// lets us keep the kernel matrix cached and valid
if (ind < (int)m_svs.size()-1)
{
SwapSupportVectors(ind, (int)m_svs.size()-1);
ind = (int)m_svs.size()-1;
}
delete m_svs[ind];
m_svs.pop_back();
}

void LaRank::BudgetMaintenanceRemove()
{
// find negative sv with smallest effect on discriminant function if removed
double minVal = DBL_MAX;
int in = -1;
int ip = -1;
for (int i = 0; i < (int)m_svs.size(); ++i)
{
if (m_svs[i]->b < 0.0)           //找到负sv
{
// find corresponding positive sv
int j = -1;
for (int k = 0; k < (int)m_svs.size(); ++k)
{
if (m_svs[k]->b > 0.0 && m_svs[k]->x == m_svs[i]->x)   //找到同一支持模式下的正sv
{
j = k;
break;
}
}
double val = m_svs[i]->b*m_svs[i]->b*(m_K(i,i) + m_K(j,j) - 2.0*m_K(i,j));
if (val < minVal)       　　//找到对F影响最小的sv
{
minVal = val;
in = i;
ip = j;
}
}
}

// adjust weight of positive sv to compensate for removal of negative
m_svs[ip]->b += m_svs[in]->b;    //将负sv移除，其相应的beta值需补偿到正sv上。

// remove negative sv
RemoveSupportVector(in);
if (ip == (int)m_svs.size())
{
// ip and in will have been swapped during support vector removal
ip = in;
}

if (m_svs[ip]->b < 1e-8)     //beta值为0，移除该向量
{
// also remove positive sv
RemoveSupportVector(ip);
}

// update gradients
// TODO: this could be made cheaper by just adjusting incrementally rather than recomputing
for (int i = 0; i < (int)m_svs.size(); ++i)
{
SupportVector& svi = *m_svs[i];
svi.g = -Loss(svi.x->yv[svi.y],svi.x->yv[svi.x->y]) - Evaluate(svi.x->x[svi.y], svi.x->yv[svi.y]);
}
}

void LaRank::Debug()
{
cout << m_sps.size() << "/" << m_svs.size() << " support patterns/vectors" << endl;
UpdateDebugImage();
imshow("learner", m_debugImage);
}

void LaRank::UpdateDebugImage()    //该函数主要用于样本显示，与算法关系不大，这里不做分析了
{
m_debugImage.setTo(0);

int n = (int)m_svs.size();

if (n == 0) return;

const int kCanvasSize = 600;
int gridSize = (int)sqrtf((float)(n-1)) + 1;
int tileSize = (int)((float)kCanvasSize/gridSize);

if (tileSize < 5)
{
cout << "too many support vectors to display" << endl;
return;
}

Mat temp(tileSize, tileSize, CV_8UC1);
int x = 0;
int y = 0;
int ind = 0;
float vals[kMaxSVs];
memset(vals, 0, sizeof(float)*n);
int drawOrder[kMaxSVs];

for (int set = 0; set < 2; ++set)
{
for (int i = 0; i < n; ++i)
{
if (((set == 0) ? 1 : -1)*m_svs[i]->b < 0.0) continue;

drawOrder[ind] = i;
vals[ind] = (float)m_svs[i]->b;
++ind;

Mat I = m_debugImage(cv::Rect(x, y, tileSize, tileSize));
resize(m_svs[i]->x->images[m_svs[i]->y], temp, temp.size());
cvtColor(temp, I, CV_GRAY2RGB);
double w = 1.0;
rectangle(I, Point(0, 0), Point(tileSize-1, tileSize-1), (m_svs[i]->b > 0.0) ? CV_RGB(0, (uchar)(255*w), 0) : CV_RGB((uchar)(255*w), 0, 0), 3);
x += tileSize;
if ((x+tileSize) > kCanvasSize)
{
y += tileSize;
x = 0;
}
}
}

const int kKernelPixelSize = 2;
int kernelSize = kKernelPixelSize*n;

double kmin = m_K.minCoeff();
double kmax = m_K.maxCoeff();

if (kernelSize < m_debugImage.cols && kernelSize < m_debugImage.rows)
{
Mat K = m_debugImage(cv::Rect(m_debugImage.cols-kernelSize, m_debugImage.rows-kernelSize, kernelSize, kernelSize));
for (int i = 0; i < n; ++i)
{
for (int j = 0; j < n; ++j)
{
Mat Kij = K(cv::Rect(j*kKernelPixelSize, i*kKernelPixelSize, kKernelPixelSize, kKernelPixelSize));
uchar v = (uchar)(255*(m_K(drawOrder[i], drawOrder[j])-kmin)/(kmax-kmin));
Kij.setTo(Scalar(v, v, v));
}
}
}
else
{
kernelSize = 0;
}

Mat I = m_debugImage(cv::Rect(0, m_debugImage.rows - 200, m_debugImage.cols-kernelSize, 200));
I.setTo(Scalar(255,255,255));
IplImage II = I;
setGraphColor(0);
drawFloatGraph(vals, n, &II, 0.f, 0.f, I.cols, I.rows);
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航