您的位置:首页 > 编程语言 > C语言/C++

Logistic回归模型的训练与测试,C++ 实现

2015-06-03 14:09 477 查看
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <cstring>
#include <stdio.h>
#include <algorithm>
#include <cmath>
#include <random>

using namespace std;

void loadDataset(vector<vector<double>> &dataMat,vector<int> &labelMat,const string &filename)
{

ifstream file(filename);
string line;
while(getline(file,line))
{
istringstream record(line);
vector<double> data;
data.push_back(1.0);
double temp;
while(record>>temp)
data.push_back(temp);
labelMat.push_back(int(temp));
data.pop_back();
dataMat.push_back(data);
}
}

double scalarProduct(vector<double> &w,vector<double> &x)
{
double ret=0.0;
for(int i=0;i<w.size();i++)
ret+=w[i]*x[i];
return ret;
}

double sigmoid(double z)
{
double ret=1/(1+exp(-z));
return ret;
}

vector<vector<double>> matTranspose(vector<vector<double>> &dataMat)
{
vector<vector<double>> ret(dataMat[0].size(),vector<double>(dataMat.size(),0));
for(int i=0;i<ret.size();i++)
for(int j=0;j<ret[0].size();j++)
ret[i][j]=dataMat[j][i];
return ret;
}

void  gradAscent(vector<double> &weight,
vector<vector<double>> &dataMat,vector<int> &labelMat)
{
int maxCycles=500;
double alpha=0.001;
vector<vector<double>> dataMatT=matTranspose(dataMat);
while(maxCycles>0)
{
vector<double> h;
vector<double> error;
for(auto &data:dataMat)
h.push_back(sigmoid(scalarProduct(data,weight)));
for(int i=0;i<labelMat.size();i++)
{
double dist=labelMat[i]-h[i];
if(abs(dist)<1e-10)
dist=0;
error.push_back(dist);
}
for(int i=0;i<weight.size();i++)
weight[i]+=alpha*scalarProduct(dataMatT[i],error);
maxCycles--;
}

}

void stocGradAscent(vector<double> &weight,
vector<vector<double>> &dataMat,vector<int> &labelMat,int numIter=150)
{
double alpha=0.01;
double h=0.0;
int i=0;
int j=0;
double error=0.0;
vector<int> randIndex;
for(i=0;i<dataMat.size();i++)
randIndex.push_back(i);

for(int k=0;k<numIter;k++)
{
random_shuffle(randIndex.begin(),randIndex.end());

for(i=0;i<dataMat.size();i++)
{
alpha=4/(1+k+i)+0.01;
h=sigmoid(scalarProduct(dataMat[randIndex[i]],weight));
error=labelMat[randIndex[i]]-h;
for(j=0;j<weight.size();j++)
{
weight[j]+=alpha*error*dataMat[randIndex[i]][j];
}
}
}
}

int classify(vector<double> &data,vector<double> &weights)
{
if(scalarProduct(data,weights)>0.5)
return 1;
else
return 0;
}

double testResult(vector<vector<double>> &testDataMat,
vector<int> &testDataLabel,vector<double> &weight)
{
double errCount=0.0;
double dataSize=testDataMat.size();
for(int i=0;i<dataSize;i++)
if(classify(testDataMat[i],weight)!=testDataLabel[i])
errCount+=1.0;
return errCount/dataSize;

}

int main()
{
vector<vector<double>> trainMat;
vector<int> trainLabel;
string trainFile("horseColicTraining.txt");
loadDataset(trainMat,trainLabel,trainFile);

vector<vector<double>> testMat;
vector<int> testLabel;
string testFile("horseColicTest.txt");
loadDataset(testMat,testLabel,testFile);

vector<double> weight(trainMat[0].size(),1);

clock_t start_time=clock();
gradAscent(weight,trainMat,trainLabel);
double err=testResult(testMat,testLabel,weight);
clock_t end_time=clock();

for(auto v:weight)
cout<<v<<endl;

cout<<"the error rate is: "<<err<<endl;

/*
vector<double> weight2(dataMat[0].size(),1);
clock_t start_time2=clock();
stocGradAscent(weight2,dataMat,labelMat);
clock_t end_time2=clock();
*/

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: