Logistic回归模型的训练与测试,C++ 实现
2015-06-03 14:09
477 查看
#include <iostream> #include <fstream> #include <sstream> #include <string> #include <vector> #include <cstring> #include <stdio.h> #include <algorithm> #include <cmath> #include <random> using namespace std; void loadDataset(vector<vector<double>> &dataMat,vector<int> &labelMat,const string &filename) { ifstream file(filename); string line; while(getline(file,line)) { istringstream record(line); vector<double> data; data.push_back(1.0); double temp; while(record>>temp) data.push_back(temp); labelMat.push_back(int(temp)); data.pop_back(); dataMat.push_back(data); } } double scalarProduct(vector<double> &w,vector<double> &x) { double ret=0.0; for(int i=0;i<w.size();i++) ret+=w[i]*x[i]; return ret; } double sigmoid(double z) { double ret=1/(1+exp(-z)); return ret; } vector<vector<double>> matTranspose(vector<vector<double>> &dataMat) { vector<vector<double>> ret(dataMat[0].size(),vector<double>(dataMat.size(),0)); for(int i=0;i<ret.size();i++) for(int j=0;j<ret[0].size();j++) ret[i][j]=dataMat[j][i]; return ret; } void gradAscent(vector<double> &weight, vector<vector<double>> &dataMat,vector<int> &labelMat) { int maxCycles=500; double alpha=0.001; vector<vector<double>> dataMatT=matTranspose(dataMat); while(maxCycles>0) { vector<double> h; vector<double> error; for(auto &data:dataMat) h.push_back(sigmoid(scalarProduct(data,weight))); for(int i=0;i<labelMat.size();i++) { double dist=labelMat[i]-h[i]; if(abs(dist)<1e-10) dist=0; error.push_back(dist); } for(int i=0;i<weight.size();i++) weight[i]+=alpha*scalarProduct(dataMatT[i],error); maxCycles--; } } void stocGradAscent(vector<double> &weight, vector<vector<double>> &dataMat,vector<int> &labelMat,int numIter=150) { double alpha=0.01; double h=0.0; int i=0; int j=0; double error=0.0; vector<int> randIndex; for(i=0;i<dataMat.size();i++) randIndex.push_back(i); for(int k=0;k<numIter;k++) { random_shuffle(randIndex.begin(),randIndex.end()); for(i=0;i<dataMat.size();i++) { alpha=4/(1+k+i)+0.01; h=sigmoid(scalarProduct(dataMat[randIndex[i]],weight)); error=labelMat[randIndex[i]]-h; for(j=0;j<weight.size();j++) { weight[j]+=alpha*error*dataMat[randIndex[i]][j]; } } } } int classify(vector<double> &data,vector<double> &weights) { if(scalarProduct(data,weights)>0.5) return 1; else return 0; } double testResult(vector<vector<double>> &testDataMat, vector<int> &testDataLabel,vector<double> &weight) { double errCount=0.0; double dataSize=testDataMat.size(); for(int i=0;i<dataSize;i++) if(classify(testDataMat[i],weight)!=testDataLabel[i]) errCount+=1.0; return errCount/dataSize; } int main() { vector<vector<double>> trainMat; vector<int> trainLabel; string trainFile("horseColicTraining.txt"); loadDataset(trainMat,trainLabel,trainFile); vector<vector<double>> testMat; vector<int> testLabel; string testFile("horseColicTest.txt"); loadDataset(testMat,testLabel,testFile); vector<double> weight(trainMat[0].size(),1); clock_t start_time=clock(); gradAscent(weight,trainMat,trainLabel); double err=testResult(testMat,testLabel,weight); clock_t end_time=clock(); for(auto v:weight) cout<<v<<endl; cout<<"the error rate is: "<<err<<endl; /* vector<double> weight2(dataMat[0].size(),1); clock_t start_time2=clock(); stocGradAscent(weight2,dataMat,labelMat); clock_t end_time2=clock(); */ }
相关文章推荐
- C++学习之Pair
- 【Scramble String】cpp
- NYOJ 46 最少乘法次数
- NYOJ 45 棋盘覆盖
- NYOJ 722 数独
- C++实现事件机制
- NYOJ 76 超级台阶
- 在Golang中使用C语言代码实例
- NYOJ 36 最长公共子序列 (还是dp)
- NYOJ 37 回文字符串
- NYOJ 467 中缀式变后缀式
- NYOJ 44 子串和 (经典的dp问题)
- vc++6.0设定UNICODE编译环境
- vc++6.0配置和使用GDI+
- C++高级编程(第3版)
- Effective C++条款40
- c++ ofstream & ifstream文件流操作
- c++ ofstream & ifstream文件流操作
- struct字节对齐问题
- c++学习二