您的位置:首页 > 编程语言 > C语言/C++

第三次实现Logistic Regression(c++)_实现(二)

2014-01-18 14:19 423 查看
2. SGD训练

SGD权重更新方式,同LR二分类的基本相同;所不同的是,二分类LR只用训练一个权重向量,而K分类LR需要训练K-1个权重向量。函数接口如下:

// train by SGD on the sample file
bool TrainSGDOnSampleFile (
const char * sFileName, int iClassNum, int iFeatureNum,		// about the samples
double dLearningRate,										// about the learning
int iMaxLoop, double dMinImproveRatio						// about the stop criteria
);


调用private函数如下:

// initialize the theta matrix with iClassNum and iFeatureNum
bool InitThetaMatrix (int iClassNum, int iFeatureNum);
// calculate the model function output for iClassIndex by feature vector
double CalcFuncOutByFeaVec (vector<FeaValNode> & FeaValNodeVec, int iClassIndex);
// calculate the model function output for all the classes, and return the class index with max probability
int CalcFuncOutByFeaVecForAllClass (vector<FeaValNode> & FeaValNodeVec, vector<double> & ClassProbVec);
// calculate the gradient and update the theta matrix, it returns the cost
double UpdateThetaMatrix (Sample & theSample, vector<double> & ClassProbVec, double dLearningRate);


函数功能分别是初始化权重矩阵、利用现有LR模型参数计算当前类别的预测概率、计算所有类别的预测概率、更新权重。

SGD的函数实现代码如下:

// the sample format: classid feature1_value feature2_value...
bool LogisticRegression::TrainSGDOnSampleFile (
const char * sFileName, int iClassNum, int iFeatureNum,		// about the samples
double dLearningRate = 0.05,								// about the learning
int iMaxLoop = 1, double dMinImproveRatio = 0.01			// about the stop criteria
)
{
ifstream in (sFileName);
if (!in)
{
cerr << "Can not open the file of " << sFileName << endl;
return false;
}

if (!InitThetaMatrix (iClassNum, iFeatureNum))
return false;

double dCost = 0.0;
double dPreCost = 100.0;
for (int iLoop = 0; iLoop < iMaxLoop; iLoop++)
{
int iSampleNum = 0;
int iErrNum = 0;
string sLine;
while (getline (in, sLine))
{
Sample theSample;
if (ReadSampleFrmLine (sLine, theSample))
{
vector<double> ClassProbVec;
int iPredClassIndex = CalcFuncOutByFeaVecForAllClass (theSample.FeaValNodeVec, ClassProbVec);
if (iPredClassIndex != theSample.iClass)
iErrNum++;

dCost += UpdateThetaMatrix (theSample, ClassProbVec, dLearningRate);
iSampleNum++;
}
}

dCost /= iSampleNum;
double dTmpRatio = (dPreCost - dCost) / dPreCost;
double dTmpErrRate = (double)iErrNum / iSampleNum;

// show info on screen
cout << "In loop " << iLoop << ": current cost (" << dCost << ") previous cost (" << dPreCost << ") ratio (" << dTmpRatio << ") "<< endl;
cout << "And Error rate : " << dTmpErrRate << endl;

if (dTmpRatio < dMinImproveRatio)
break;
else
{
dPreCost = dCost;
dCost = 0.0;
//reset the current reading position of file
in.clear();
in.seekg (0, ios::beg);
}
}

return true;
}


其中计算各个类别概率方式如下:

// it returns the value of f(x) = exp (W*X) for iClassIndex < K, otherwise 1.0 for iClassIndex == K
double LogisticRegression::CalcFuncOutByFeaVec(vector<FeaValNode> & FeaValNodeVec, int iClassIndex)
{
if (iClassIndex >= iClassNum || iClassIndex <0)		// wrong situation
return 0.0;

if (iClassIndex == (iClassNum-1) )	// the default class (here is the class with max index)
return 1.0;

double dX = 0.0;
vector<FeaValNode>::iterator p = FeaValNodeVec.begin();
while (p != FeaValNodeVec.end())
{
if (p->iFeatureId < (int)ThetaMatrix.at(iClassIndex).size())	// all input is evil
dX += ThetaMatrix[iClassIndex][p->iFeatureId] * p->dValue;
p++;
}

double dY = exp (dX);
return dY;
}


注意两点:1. 在K个类别中,第K个类别是default类别;2. 此时返回的不是sigmoid函数值,而是指数函数值。最终的概率在如下代码中计算:

// the class probability is calculated by :
// f(x) = exp (W*X) / {1.0 + sum_exp (W*X)} as long as iClassIndex < K
// f(x) = 1.0 / {1.0 + sum_exp (W*X)} as long as iClassIndex == K
int LogisticRegression::CalcFuncOutByFeaVecForAllClass (vector<FeaValNode> & FeaValNodeVec, vector<double> & ClassProbVec)
{
ClassProbVec.clear();
ClassProbVec.resize (iClassNum, 0.0);

double dSum = 1.0;
for (int i=0; i<iClassNum; i++)
{
ClassProbVec.at(i) = CalcFuncOutByFeaVec (FeaValNodeVec, i);
dSum += ClassProbVec.at(i);
}

double dMaxProb = 0.0;
int iClassMaxProb = -1;
for (int i=0; i<iClassNum; i++)
{
ClassProbVec.at(i) /= dSum;
if (ClassProbVec.at(i) > dMaxProb)
{<pre code_snippet_id="161875" snippet_file_name="blog_20140118_5_8754114" name="code" class="cpp"><pre code_snippet_id="161875" snippet_file_name="blog_20140118_5_8754114" name="code" class="cpp"><span style="white-space:pre">				</span>dMaxProb = ClassProbVec.at(i);
iClassMaxProb = i;

}}return iClassMaxProb;}

计算出的概率实际上是softmax概率。权重更新函数:

// the update formula is : theta_new = theta_old - dLearningRate * (dY - iClass) * dXi
double LogisticRegression::UpdateThetaMatrix (Sample & theSample, vector<double> & ClassProbVec, double dLearningRate)
{
double dCost = 0.0;
for (int i=0; i<iClassNum-1; i++)
{
if (i == theSample.iClass)
{
vector<FeaValNode>::iterator p = theSample.FeaValNodeVec.begin();
while (p != theSample.FeaValNodeVec.end())
{
if (p->iFeatureId < (int)ThetaMatrix[i].size())
{
double dGradient = (ClassProbVec[i] - 1.0) * p->dValue;
double dDelta = dGradient * dLearningRate;
ThetaMatrix[i][p->iFeatureId] -= dDelta;
}
p++;
}
// cost = log(dY) when the sample class is the predicted class, otherwise cost = log(1.0 - dY)
dCost -= log (ClassProbVec[i]);
}
else
{
vector<FeaValNode>::iterator p = theSample.FeaValNodeVec.begin();
while (p != theSample.FeaValNodeVec.end())
{
if (p->iFeatureId < (int)ThetaMatrix[i].size())
{
double dGradient = ClassProbVec[i] * p->dValue;
double dDelta = dGradient * dLearningRate;
ThetaMatrix[i][p->iFeatureId] -= dDelta;
}
p++;
}
// cost = log(dY) when the sample class is the predicted class, otherwise cost = log(1.0 - dY)
dCost -= log (1.0 - ClassProbVec[i]);
}
}

return dCost;
}


完。

转载请注明出处:http://blog.csdn.net/xceman1997/article/details/18449317
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: