您的位置:首页 > 其它

Latent Structural SVM

2016-04-19 17:14 162 查看

隐结构SVM(Latent Structural SVM)

让 x∈χx\in\chi , y∈{−1,1}y\in\{-1,1\} 并且 h∈Hh\in\mathcal{H} 分别表示我们问题中的输入、输出和隐变量。在我们的上下文中,x、y、h对应的是一幅图片、它的标签和一个窗口(或是bounding box)。接下来的设定在[1]中,我们在一个联合输入/输出/隐变量空间 O\mathcal{O} 中考虑一个线性的预测规则。我们定一个一个输入/输出/隐变量映射 Φ(x,y,h)∈X×Y×H→O⊂Rd\Phi(x,y,h)\in\mathcal{X}\times\mathcal{Y}\times\mathcal{H}\rightarrow\mathcal{O}\subset\mathcal{R}^d 这样

Φ(x,y,h)={0→χ(x,h)ifify⩽0y>0\Phi(x,y,h)=\begin{cases}
\overrightarrow{0}&if&y\leqslant0\\
\chi(x,h) &if &y>0
\end{cases}

这里0→\overrightarrow{0}是d维的0向量并且Φ(x,h)\Phi(x,h)是表示从图像 xx 截取的窗口 hh 的表示。例如,在我们实验中具有4096维的DeCAF特征[2]

预测

y∗$=argmaxy∈Y(maxh∈Hw⋅Φ(x,y,h)).y^*$=arg \max_{y\in\mathcal{Y}}(\max_{h\in\mathcal{H}}w\centerdot\Phi(x,y,h)).

对于训练样本的经验损失用一个附加的2范数表示

L(w;S;λ)=12λ∥w∥2+∑i=1nl(w,xi,yi)\mathcal{L}(w;S;\lambda)=\frac{1}{2\lambda}\lVert w \rVert^2+\sum_{i=1}^n l(w,x^i,y^i)

max-margin function

lm(w,xi,yi)=maxy,h(w⋅Φ(wi,y,h)+Δ(yiy))−maxhww⋅Φ(xi,yi,h)l_m(w,x^i,y^i)=\max_{y,h}(w\centerdot\Phi(w^i,y,h)+\Delta(y^iy))-\max_{hw}w\centerdot\Phi(x^i,y^i,h)

soft-max function

ls(w,xi,yi)=log∑y,hexp(w⋅Φ(xi,y,h)+Δ(yi,y))−log∑hexp(w⋅Φ(xi,yi,h))l_{s}(w,x^i,y^i)=log\sum_{y,h}exp(w\centerdot\Phi(x^i,y,h)+\Delta(y^i,y))-log\sum_{h}exp(w\centerdot\Phi(x^i,y^i,h))

% matlab code
% define objective function
% featTrain.x
% [0.6303411;-0.7763183;1](1个)
% [0.8341424,0.4734712;0.5515491,0.8808093;1,1](2)
% [-0.4017371,-0.9285000,-0.2376855;-0.9157550,-0.3713324,-0.9713422;1,1,1](3)
% [0.8490803,0.8840563,0.9590592;0.5282639,0.4673803,0.2832058;1,1,1](3)
% [0.7834677;-0.6214325;1](1)
% [-0.5194708,-0.1672711,-0.6840945;-0.8544882,-0.9859110,-0.7293934;1,1,1](3)
% [-0.5074486,-0.6840267,-0.8343239;-0.8616820,-0.7294570,-0.5512746;1,1,1](3)
% [0.4799071,0.6633653,0.4913931;0.8773193,0.7482957,0.8709378;1,1,1](3)
% [0.8006670,0.9598586;0.5991096,0.2804843;1,1](2)
% [0.6355712,0.8618172;0.7720423,0.5072189;1,1](2)
%
% labelTrain
% -1    1   -1  1   1   -1  -1  1   1   1
% lambda = single(1e-5);
% beta = single(1)

funObj = @(w)SLSVMLossC2(w,featTrain,labelTrain,lambda,beta);
% matlab中的变量对应的SLSVMLossC2.c文件中的变量
% matlab :  C
% w -> w
% featTrain -> tmp
% labelTrain -> y
% lambda ->lambda=
% beta -> beta=1
% nfields 是特征的个数
% /* get input arguments */
% 获得结构体阵列的域的数量
% nfields(=1) = mxGetNumberOfFields(prhs[1]);
% 获得阵列中元素的个数。
% NStructElems(=10) = mxGetNumberOfElements(prhs[1]);
% nImags=10 是标签的个数,即图像的个数
% nVars=3 是权值(w)的个数
% learn soft-max latent svm vector
W = minFunc(funObj,W0,options);


/* Hakan Bilen
* August 5, 2015
*
* Implementation of soft-max latent SVM in
* "Weakly Supervised Object Detection with Posterior Regularization" in
* BMVC 2014.
*
* Warning : posterior regularization for symmetry and mutual exclusion are
* not implemented in this file!
* 该代码一共有两个返回量
* f:惩罚
* g:梯度
*/

#include <math.h>
#include <limits.h>
#include <omp.h>
#include "mex.h"

/* This function may not exit gracefully on bad input! */

float myLogSumExp(const float * vec, int dim) ;
void computeProb(const float * in, int dim, float * out) ;

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
/* Variable Declarations */

double *w, f, *g, *y;
int nVars, nImgs;
float lambda, beta;
float lpos2neg,lneg2pos;

/* Get Input Pointers */
w      = mxGetPr(prhs[0]);
y      = mxGetPr(prhs[2]);
lambda = mxGetScalar(prhs[3]);
beta   = mxGetScalar(prhs[4]);

float np = 0;
float nn = 0;

nImgs = (int)mxGetNumberOfElements(prhs[2]);
nVars = (int)mxGetNumberOfElements(prhs[0]);

int        ifield, nfields;
mwIndex    jstruct;
mwSize     NStructElems;
mwSize     ndim;

if(!mxIsStruct(prhs[1]))
mexErrMsgIdAndTxt( "MATLAB:SLSVMC2:inputNotStruct",
"Input must be a structure.");

/* get input arguments */
nfields = mxGetNumberOfFields(prhs[1]);
NStructElems = mxGetNumberOfElements(prhs[1]);

if (NStructElems!=nImgs)
mexErrMsgIdAndTxt( "MATLAB:SLSVMC2:WrongNumImgs",
"Wrong number of images!");

/*number of features (boxes) for each image */
int * nBoxes = mxCalloc(nImgs,sizeof(int));
int * cumNBoxes = mxCalloc(nImgs+1,sizeof(int));
cumNBoxes[0] = 0;

int i,b,d;
for(i=1;i<=nImgs;i++) {
const mxArray *tmp = mxGetFieldByNumber(prhs[1], i-1, 0);
if(tmp == NULL) {
mexPrintf("%s%d\t%s%d\n", "FIELD: ", ifield+1, "STRUCT INDEX :", 1);
mexErrMsgIdAndTxt( "MATLAB:data:fieldEmpty",
"Above field is empty!");
}
nBoxes[i-1] = (int)mxGetDimensions(tmp)[1];
//这里是第i个样本中窗口的个数

if (mxGetDimensions(tmp)[0]!=nVars)
mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wrongDim","Wrong feature dimensionality!");

cumNBoxes[i] = cumNBoxes[i-1] + nBoxes[i-1];
//这里cumNBoxes是前i个样本中获取总窗口的数量
}

/*  mexPrintf("X[0,end] %f\n",X[4096]);
mexPrintf("nImgs %d nVars %d\n",nImgs,nVars);
mexPrintf("X[2,0] %f\n",X[2*4097]);
mexPrintf("X[2,end] %f\n",X[3*4097-1]);
*/

/* Allocated Memory for Function Variables */
/*  plhs[0] = mxCreateDoubleScalar(0); */
plhs[1] = mxCreateDoubleMatrix(nVars,1,mxREAL);
g = mxGetPr(plhs[1]);

float * fs = mxCalloc(nImgs,sizeof(float));
float * gs = mxCalloc(nImgs*nVars,sizeof(float));

/* get number of positives and negatives */
for(i=0;i<nImgs;i++) {
if(y[i]>0) {
np++;
//np是正样本的个数(6)
}
else if(y[i]<0) {
nn++;
//nn是负样本的个数(4)
}
}

if (nn==0 || np==0)
mexErrMsgIdAndTxt( "MATLAB:data:wlabel",
"No pos or neg label!");

/* balanced loss for pos and neg */
lpos2neg = 0.5 * (np+nn) / np;
lneg2pos = 0.5 * (np+nn) / nn;

float ** convProbs = (float **)mxCalloc(nImgs,sizeof(float*));
float ** concProbs = (float **)mxCalloc(nImgs,sizeof(float*));
float ** scores    = (float **)mxCalloc(nImgs,sizeof(float*));
float ** augScores = (float **)mxCalloc(nImgs,sizeof(float*));

for(i=0;i<nImgs;i++) {
convProbs[i] = (float *)mxCalloc(2*(int)nBoxes[i],sizeof(float));
concProbs[i] = (float *)mxCalloc((int)nBoxes[i],sizeof(float));
scores[i]    = (float *)mxCalloc((int)nBoxes[i],sizeof(float));
augScores[i] = (float *)mxCalloc(2*(int)nBoxes[i],sizeof(float));

}

#pragma omp parallel for schedule(dynamic) private(i)
for(i=0;i<nImgs;i++) {
if(y[i]==0)
continue;

const mxArray *tmp = mxGetFieldByNumber(prhs[1], i, 0);
if(tmp == NULL) {
mexPrintf("%s%d\t%s%d\n", "FIELD: ", ifield+1, "STRUCT INDEX :", 1);
mexErrMsgIdAndTxt( "MATLAB:data:fieldEmpty",
"Above field is empty!");
}
const float * x = (float *)mxGetData(tmp);
//应该是获取第i幅图像的窗口数量
int nB = (int)nBoxes[i];

if((int)mxGetDimensions(tmp)[1]!=nB)
mexErrMsgIdAndTxt("MATLAB:SLSVMC2:empty","mxGetDimensions(tmp)[1]!=nB");

if(nB==0)
mexErrMsgIdAndTxt("MATLAB:SLSVMC2:zeroval","zero num bb");

if (mxGetDimensions(tmp)[0]!=nVars)
mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wrongdim","wrong feat dim");

/*    mexPrintf("y[%d] = %f nB %d\n",i,y[i],nB); */
float concScore = 0;

int b, d;

for(b=0;b<nB;b++) {
for(d=0;d<nVars;d++) {
//对每个窗口进行得分相加
scores[i][b] += (float)w[d] * x[nVars*b+d];
}
//计算第i幅图像的第b个窗口的得分
scores[i][b] *= beta;
}
//凹的部分
/* concave part */
if(y[i]>0) {
concScore = myLogSumExp(scores[i],nB);
computeProb(scores[i],nB,concProbs[i]);
}
else if(y[i]<0) {
concScore = logf((float)nB);
for(b=0;b<nB;b++) {
concProbs[i][b] = 0;
}
}
else {
mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wlabel","wrong label");
}
//凸的部分
/* convex part */
if(y[i]>0) {
for(b=0;b<nB;b++) {
augScores[i][b] = scores[i][b];
augScores[i][b+nB] = beta * lpos2neg;
}
}
else if(y[i]<0) {
for(b=0;b<nB;b++) {
augScores[i][b] = scores[i][b] + beta * lneg2pos;
augScores[i][b+nB] = 0;
}
}
else {
mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wlabel","wrong label");
}

computeProb(augScores[i],2*nB,convProbs[i]);

for(b=0;b<nB;b++) {
float difp = (convProbs[i][b]-concProbs[i][b]);
for(d=0;d<nVars;d++) {
gs[i*nVars + d] += x[nVars*b+d] * difp;
}
}
float convScore = myLogSumExp(augScores[i],2*nB);
fs[i] = convScore - concScore;
}
for(i=0;i<nImgs;i++) {
mxFree(augScores[i]);
mxFree(scores[i]);
mxFree(convProbs[i]);
mxFree(concProbs[i]);
}
mxFree(augScores);
mxFree(scores);
mxFree(convProbs);
mxFree(concProbs);

/* sum objval and grads over all images */
for(i=0;i<nImgs;i++) {
if(y[i]==0)
continue;

f += fs[i];
for(d=0;d<nVars;d++) {
g[d] += gs[i*nVars+d];
}
}
for(d=0;d<nVars;d++) {
g[d] /= (nn+np);
}
f /= beta * (nn+np);

/* add regularization */
for(d=0;d<nVars-1;d++) {
f += 0.5 * lambda * w[d] * w[d] ;
}

for(d=0;d<nVars-1;d++) {
g[d] += lambda * w[d] ;
}
mxFree(cumNBoxes);
mxFree(nBoxes);
mxFree(gs);
mxFree(fs);
/*  mxFree(gconcProbs);
mxFree(gconvProbs);
mxFree(gscores);
mxFree(gaugScores); */

plhs[0] = mxCreateDoubleScalar(f);
}

/*---------------------------------------------------------------------------*/
float myLogSumExp(const float * vec, int dim) {

float maxScore = -FLT_MAX ;
int i=0;
for (i=0;i<dim;i++) {
if(maxScore<vec[i])
maxScore = vec[i];
}
float sumScore = 0.f;
for (i=0;i<dim;i++) {
sumScore += expf(vec[i]-maxScore);
}
return logf(sumScore)+maxScore;
}
/*---------------------------------------------------------------------------*/
void computeProb(const float * in, int dim, float * out) {

float maxScore = -FLT_MAX ;

int i=0;
for (i=0;i<dim;i++) {
if(maxScore<in[i])
maxScore = in[i];
}
//获取当前图片中的最大得分记为maxScore
float sumExp = 0.f;
for (i=0;i<dim;i++) {
sumExp += expf(in[i]-maxScore);
}
mxAssert(sumExp>0.f,"");

const float rSumExp = 1.f / sumExp;
for (i=0;i<dim;i++) {
out[i] = expf(in[i]-maxScore) * rSumExp;
}
}


[1] C. John Yu and T. Joachims. Learning structural svms with latent variables. In ICML, pages 1169–1176, 2009.

[2] Chaitanya Desai, Deva Ramanan, and Charless C Fowlkes. Discriminative models for multi-class object layout. International journal of computer vision, 95(1):1–12, 201
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: