Latent Structural SVM
2016-04-19 17:14
162 查看
隐结构SVM(Latent Structural SVM)
让 x∈χx\in\chi , y∈{−1,1}y\in\{-1,1\} 并且 h∈Hh\in\mathcal{H} 分别表示我们问题中的输入、输出和隐变量。在我们的上下文中,x、y、h对应的是一幅图片、它的标签和一个窗口(或是bounding box)。接下来的设定在[1]中,我们在一个联合输入/输出/隐变量空间 O\mathcal{O} 中考虑一个线性的预测规则。我们定一个一个输入/输出/隐变量映射 Φ(x,y,h)∈X×Y×H→O⊂Rd\Phi(x,y,h)\in\mathcal{X}\times\mathcal{Y}\times\mathcal{H}\rightarrow\mathcal{O}\subset\mathcal{R}^d 这样Φ(x,y,h)={0→χ(x,h)ifify⩽0y>0\Phi(x,y,h)=\begin{cases}
\overrightarrow{0}&if&y\leqslant0\\
\chi(x,h) &if &y>0
\end{cases}
这里0→\overrightarrow{0}是d维的0向量并且Φ(x,h)\Phi(x,h)是表示从图像 xx 截取的窗口 hh 的表示。例如,在我们实验中具有4096维的DeCAF特征[2]。
预测
y∗$=argmaxy∈Y(maxh∈Hw⋅Φ(x,y,h)).y^*$=arg \max_{y\in\mathcal{Y}}(\max_{h\in\mathcal{H}}w\centerdot\Phi(x,y,h)).
对于训练样本的经验损失用一个附加的2范数表示
L(w;S;λ)=12λ∥w∥2+∑i=1nl(w,xi,yi)\mathcal{L}(w;S;\lambda)=\frac{1}{2\lambda}\lVert w \rVert^2+\sum_{i=1}^n l(w,x^i,y^i)
max-margin function
lm(w,xi,yi)=maxy,h(w⋅Φ(wi,y,h)+Δ(yiy))−maxhww⋅Φ(xi,yi,h)l_m(w,x^i,y^i)=\max_{y,h}(w\centerdot\Phi(w^i,y,h)+\Delta(y^iy))-\max_{hw}w\centerdot\Phi(x^i,y^i,h)
soft-max function
ls(w,xi,yi)=log∑y,hexp(w⋅Φ(xi,y,h)+Δ(yi,y))−log∑hexp(w⋅Φ(xi,yi,h))l_{s}(w,x^i,y^i)=log\sum_{y,h}exp(w\centerdot\Phi(x^i,y,h)+\Delta(y^i,y))-log\sum_{h}exp(w\centerdot\Phi(x^i,y^i,h))
% matlab code % define objective function % featTrain.x % [0.6303411;-0.7763183;1](1个) % [0.8341424,0.4734712;0.5515491,0.8808093;1,1](2) % [-0.4017371,-0.9285000,-0.2376855;-0.9157550,-0.3713324,-0.9713422;1,1,1](3) % [0.8490803,0.8840563,0.9590592;0.5282639,0.4673803,0.2832058;1,1,1](3) % [0.7834677;-0.6214325;1](1) % [-0.5194708,-0.1672711,-0.6840945;-0.8544882,-0.9859110,-0.7293934;1,1,1](3) % [-0.5074486,-0.6840267,-0.8343239;-0.8616820,-0.7294570,-0.5512746;1,1,1](3) % [0.4799071,0.6633653,0.4913931;0.8773193,0.7482957,0.8709378;1,1,1](3) % [0.8006670,0.9598586;0.5991096,0.2804843;1,1](2) % [0.6355712,0.8618172;0.7720423,0.5072189;1,1](2) % % labelTrain % -1 1 -1 1 1 -1 -1 1 1 1 % lambda = single(1e-5); % beta = single(1) funObj = @(w)SLSVMLossC2(w,featTrain,labelTrain,lambda,beta); % matlab中的变量对应的SLSVMLossC2.c文件中的变量 % matlab : C % w -> w % featTrain -> tmp % labelTrain -> y % lambda ->lambda= % beta -> beta=1 % nfields 是特征的个数 % /* get input arguments */ % 获得结构体阵列的域的数量 % nfields(=1) = mxGetNumberOfFields(prhs[1]); % 获得阵列中元素的个数。 % NStructElems(=10) = mxGetNumberOfElements(prhs[1]); % nImags=10 是标签的个数,即图像的个数 % nVars=3 是权值(w)的个数 % learn soft-max latent svm vector W = minFunc(funObj,W0,options);
/* Hakan Bilen * August 5, 2015 * * Implementation of soft-max latent SVM in * "Weakly Supervised Object Detection with Posterior Regularization" in * BMVC 2014. * * Warning : posterior regularization for symmetry and mutual exclusion are * not implemented in this file! * 该代码一共有两个返回量 * f:惩罚 * g:梯度 */ #include <math.h> #include <limits.h> #include <omp.h> #include "mex.h" /* This function may not exit gracefully on bad input! */ float myLogSumExp(const float * vec, int dim) ; void computeProb(const float * in, int dim, float * out) ; void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* Variable Declarations */ double *w, f, *g, *y; int nVars, nImgs; float lambda, beta; float lpos2neg,lneg2pos; /* Get Input Pointers */ w = mxGetPr(prhs[0]); y = mxGetPr(prhs[2]); lambda = mxGetScalar(prhs[3]); beta = mxGetScalar(prhs[4]); float np = 0; float nn = 0; nImgs = (int)mxGetNumberOfElements(prhs[2]); nVars = (int)mxGetNumberOfElements(prhs[0]); int ifield, nfields; mwIndex jstruct; mwSize NStructElems; mwSize ndim; if(!mxIsStruct(prhs[1])) mexErrMsgIdAndTxt( "MATLAB:SLSVMC2:inputNotStruct", "Input must be a structure."); /* get input arguments */ nfields = mxGetNumberOfFields(prhs[1]); NStructElems = mxGetNumberOfElements(prhs[1]); if (NStructElems!=nImgs) mexErrMsgIdAndTxt( "MATLAB:SLSVMC2:WrongNumImgs", "Wrong number of images!"); /*number of features (boxes) for each image */ int * nBoxes = mxCalloc(nImgs,sizeof(int)); int * cumNBoxes = mxCalloc(nImgs+1,sizeof(int)); cumNBoxes[0] = 0; int i,b,d; for(i=1;i<=nImgs;i++) { const mxArray *tmp = mxGetFieldByNumber(prhs[1], i-1, 0); if(tmp == NULL) { mexPrintf("%s%d\t%s%d\n", "FIELD: ", ifield+1, "STRUCT INDEX :", 1); mexErrMsgIdAndTxt( "MATLAB:data:fieldEmpty", "Above field is empty!"); } nBoxes[i-1] = (int)mxGetDimensions(tmp)[1]; //这里是第i个样本中窗口的个数 if (mxGetDimensions(tmp)[0]!=nVars) mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wrongDim","Wrong feature dimensionality!"); cumNBoxes[i] = cumNBoxes[i-1] + nBoxes[i-1]; //这里cumNBoxes是前i个样本中获取总窗口的数量 } /* mexPrintf("X[0,end] %f\n",X[4096]); mexPrintf("nImgs %d nVars %d\n",nImgs,nVars); mexPrintf("X[2,0] %f\n",X[2*4097]); mexPrintf("X[2,end] %f\n",X[3*4097-1]); */ /* Allocated Memory for Function Variables */ /* plhs[0] = mxCreateDoubleScalar(0); */ plhs[1] = mxCreateDoubleMatrix(nVars,1,mxREAL); g = mxGetPr(plhs[1]); float * fs = mxCalloc(nImgs,sizeof(float)); float * gs = mxCalloc(nImgs*nVars,sizeof(float)); /* get number of positives and negatives */ for(i=0;i<nImgs;i++) { if(y[i]>0) { np++; //np是正样本的个数(6) } else if(y[i]<0) { nn++; //nn是负样本的个数(4) } } if (nn==0 || np==0) mexErrMsgIdAndTxt( "MATLAB:data:wlabel", "No pos or neg label!"); /* balanced loss for pos and neg */ lpos2neg = 0.5 * (np+nn) / np; lneg2pos = 0.5 * (np+nn) / nn; float ** convProbs = (float **)mxCalloc(nImgs,sizeof(float*)); float ** concProbs = (float **)mxCalloc(nImgs,sizeof(float*)); float ** scores = (float **)mxCalloc(nImgs,sizeof(float*)); float ** augScores = (float **)mxCalloc(nImgs,sizeof(float*)); for(i=0;i<nImgs;i++) { convProbs[i] = (float *)mxCalloc(2*(int)nBoxes[i],sizeof(float)); concProbs[i] = (float *)mxCalloc((int)nBoxes[i],sizeof(float)); scores[i] = (float *)mxCalloc((int)nBoxes[i],sizeof(float)); augScores[i] = (float *)mxCalloc(2*(int)nBoxes[i],sizeof(float)); } #pragma omp parallel for schedule(dynamic) private(i) for(i=0;i<nImgs;i++) { if(y[i]==0) continue; const mxArray *tmp = mxGetFieldByNumber(prhs[1], i, 0); if(tmp == NULL) { mexPrintf("%s%d\t%s%d\n", "FIELD: ", ifield+1, "STRUCT INDEX :", 1); mexErrMsgIdAndTxt( "MATLAB:data:fieldEmpty", "Above field is empty!"); } const float * x = (float *)mxGetData(tmp); //应该是获取第i幅图像的窗口数量 int nB = (int)nBoxes[i]; if((int)mxGetDimensions(tmp)[1]!=nB) mexErrMsgIdAndTxt("MATLAB:SLSVMC2:empty","mxGetDimensions(tmp)[1]!=nB"); if(nB==0) mexErrMsgIdAndTxt("MATLAB:SLSVMC2:zeroval","zero num bb"); if (mxGetDimensions(tmp)[0]!=nVars) mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wrongdim","wrong feat dim"); /* mexPrintf("y[%d] = %f nB %d\n",i,y[i],nB); */ float concScore = 0; int b, d; for(b=0;b<nB;b++) { for(d=0;d<nVars;d++) { //对每个窗口进行得分相加 scores[i][b] += (float)w[d] * x[nVars*b+d]; } //计算第i幅图像的第b个窗口的得分 scores[i][b] *= beta; } //凹的部分 /* concave part */ if(y[i]>0) { concScore = myLogSumExp(scores[i],nB); computeProb(scores[i],nB,concProbs[i]); } else if(y[i]<0) { concScore = logf((float)nB); for(b=0;b<nB;b++) { concProbs[i][b] = 0; } } else { mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wlabel","wrong label"); } //凸的部分 /* convex part */ if(y[i]>0) { for(b=0;b<nB;b++) { augScores[i][b] = scores[i][b]; augScores[i][b+nB] = beta * lpos2neg; } } else if(y[i]<0) { for(b=0;b<nB;b++) { augScores[i][b] = scores[i][b] + beta * lneg2pos; augScores[i][b+nB] = 0; } } else { mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wlabel","wrong label"); } computeProb(augScores[i],2*nB,convProbs[i]); for(b=0;b<nB;b++) { float difp = (convProbs[i][b]-concProbs[i][b]); for(d=0;d<nVars;d++) { gs[i*nVars + d] += x[nVars*b+d] * difp; } } float convScore = myLogSumExp(augScores[i],2*nB); fs[i] = convScore - concScore; } for(i=0;i<nImgs;i++) { mxFree(augScores[i]); mxFree(scores[i]); mxFree(convProbs[i]); mxFree(concProbs[i]); } mxFree(augScores); mxFree(scores); mxFree(convProbs); mxFree(concProbs); /* sum objval and grads over all images */ for(i=0;i<nImgs;i++) { if(y[i]==0) continue; f += fs[i]; for(d=0;d<nVars;d++) { g[d] += gs[i*nVars+d]; } } for(d=0;d<nVars;d++) { g[d] /= (nn+np); } f /= beta * (nn+np); /* add regularization */ for(d=0;d<nVars-1;d++) { f += 0.5 * lambda * w[d] * w[d] ; } for(d=0;d<nVars-1;d++) { g[d] += lambda * w[d] ; } mxFree(cumNBoxes); mxFree(nBoxes); mxFree(gs); mxFree(fs); /* mxFree(gconcProbs); mxFree(gconvProbs); mxFree(gscores); mxFree(gaugScores); */ plhs[0] = mxCreateDoubleScalar(f); } /*---------------------------------------------------------------------------*/ float myLogSumExp(const float * vec, int dim) { float maxScore = -FLT_MAX ; int i=0; for (i=0;i<dim;i++) { if(maxScore<vec[i]) maxScore = vec[i]; } float sumScore = 0.f; for (i=0;i<dim;i++) { sumScore += expf(vec[i]-maxScore); } return logf(sumScore)+maxScore; } /*---------------------------------------------------------------------------*/ void computeProb(const float * in, int dim, float * out) { float maxScore = -FLT_MAX ; int i=0; for (i=0;i<dim;i++) { if(maxScore<in[i]) maxScore = in[i]; } //获取当前图片中的最大得分记为maxScore float sumExp = 0.f; for (i=0;i<dim;i++) { sumExp += expf(in[i]-maxScore); } mxAssert(sumExp>0.f,""); const float rSumExp = 1.f / sumExp; for (i=0;i<dim;i++) { out[i] = expf(in[i]-maxScore) * rSumExp; } }
[1] C. John Yu and T. Joachims. Learning structural svms with latent variables. In ICML, pages 1169–1176, 2009.
[2] Chaitanya Desai, Deva Ramanan, and Charless C Fowlkes. Discriminative models for multi-class object layout. International journal of computer vision, 95(1):1–12, 201
相关文章推荐
- jquery ajax提交表单
- LINQ To SQL 语法及实例大全
- MyEclipse从数据库反向生成实体类通过Hibernate的方式----mysql数据库实例
- (转)【深入浅出jQuery】源码浅析2--奇技淫巧
- 算法Sedgewick第四版-第1章基础-013一用stack实现自动补全表达式括号
- Android activity组件
- 实现点击图片的放大缩小
- 根据不同条件插入不同表SQL
- 一个int数的二进制有多少个一
- 二叉树的链式存储
- dentry 和inode整理
- 让app中链接跳转跳转到淘宝店主页,如果存在淘宝app
- 传递引用的特例
- android ImageView亮度变化
- Android中的Serialable和Parcelable的区别
- CentOS7版本安装ntp服务
- EditText hint带图片的提示
- 26. Remove Duplicates from Sorted Array
- EventBus使用详解(二)——EventBus使用进阶
- andfix增量升级更新