您的位置:首页 > 编程语言 > C语言/C++

决策树ID3算法(C++实现)

2017-12-11 22:21 246 查看
样本从网上找的。

ID3.h:

#ifndef ID3_h
#define ID3_h

#include <iostream>
#include <string>
#include <math.h>
#include <vector>

using namespace std;

using namespace std;

#define Dataset vector< vector<int> > //数据

const int Pro_num = 5;//属性的数量-1 因为最前保存了答案
const int Data_num = 14;//数据的数量
const int Ans_num = 2;//答案的数量
const int Pre_zinum[Pro_num]={0,3,3,2,2}; // 属性的种数 //比如outlook有3种

class treedata{
public:
Dataset data;
int use[Pro_num];
};

class Node{
public:
int At;//接下来要用的属性 0就有答案了。
int ans;//是答案节点
int pre;//选择了什么属性
vector<Node*> child;
};

class Tree{
public:
void init();
void CreateTree(Node* root,treedata now,int pre);
int Biggest(treedata now); //返回信息增益最高的属性
double Entropy(treedata now,int xing);//计算xing这个属性的熵
int most(treedata now); //返回最普遍的答案
void output(Node *root,int pre);
int ask(int data[],Node *root);

int kind[1111];
int fsn;
std::string S[5][4]={"0"};
std::string Pres[Pro_num]={"0"};

};

//enum outlook {SUNNY, OVERCAST, RAIN };
//enum temp {HOT, MILD, COOL };
//enum hum {HIGH, NORMAL };
//enum windy {WEAK, STRONG };

void Tree::init()
{
fsn=0;
S[1][0]="SUNNY";S[1][1]="OVERCAST";S[1][2]="RAIN";
S[2][0]="HOT"; S[2][1]="MILD"; S[2][2]="COOL";
S[3][0]="HIGH"; S[3][1]="NORMAL";
S[4][0]="WEAK"; S[4][1]="STRONG";
Pres[1]="outlook";Pres[2]="temp";Pres[3]="hum";Pres[4]="windy";
}

void Tree::output(Node *root,int pre)
{
fsn++;
int now=fsn;
if(root->At==0){
printf("节点%02d : 父亲节点:%d 答案节点:%d 这个节点选择了属性",fsn,pre,root->ans);
cout<<S[kind[pre]][root->pre]<<endl;
}else{
kind[fsn]=root->At;
printf("节点%02d : 父亲节点:%d",fsn,pre);
cout<<" 接下来以"<<Pres[root->At]<<"属性分";
if(root->pre==-1) cout<<" 这个节点是根节点"<<endl;
else{
cout<<" 这个节点选择了属性"<<S[kind[pre]][root->pre]<<endl;
}
}
for(int i=0;i<root->child.size();i++){
output(root->child[i], now);
}
}

int Tree::most(treedata now)
{
int num[Ans_num];
for(int i=0;i<now.data.size();i++){
num[now.data[i][0]]++;
}
int Max=0;
int ans=0;
for(int i=0;i<Ans_num;i++){
if(Max<num[i]){
Max=num[i];
ans=i;
}
}
return ans;
}

int Tree::ask(int now[],Node *root)
{
if(root->At==0){
return root->ans;
}
int to=root->At;
for(int i=0;i<root->child.size();i++){
if(root->child[i]->pre==now[to]){
return ask(now,root->child[i]);
break;
}
}
return -2;
}

double Tree::Entropy(treedata now,int xing)
{
int x=Pre_zinum[xing];//xing属性的选项个数
double pi[x];
int num[x];
memset(num,0,sizeof num);
memset(pi,0,sizeof pi);
int kk[x][Ans_num];
memset(kk,0,sizeof kk);
for(int i=0;i<now.data.size();i++){
num[now.data[i][xing]]++;
kk[now.data[i][xing]][now.data[i][0]]++;
}
for(int i=0;i<x;i++){
double pxing=0;
for(int j=0;j<Ans_num;j++){
double pp=kk[i][j]*1.0/num[i];
if(pp==0) continue;
pxing-=pp*log2(pp);
}
pi[i]=pxing;
}
double ans_p=0;
for(int i=0;i<x;i++){
ans_p+=num[i]*1.0/now.data.size()*pi[i];
}
return ans_p;

}

int Tree::Biggest(treedata now)
{
double all_p=0;
int num[Ans_num];memset(num,0,sizeof num);
int flag[Pro_num];memset(flag,0,sizeof flag);
for(int i=1;i<Pro_num;i++){
flag[i]=now.use[i];
}
for(int i=0;i<now.data.size();i++){
num[now.data[i][0]]++;
}
for(int i=0;i<Ans_num;i++){
double pp = num[i]*1.0/(1.0*now.data.size());
if(pp==0) continue;
all_p-=pp*log2(pp);
}
int Ans=0;
double p_now=0;
for(int i=1;i<Pro_num;i++){
if(flag[i]) continue;
double P_xing = all_p-Entropy(now, i);
if(P_xing>p_now){
p_now=P_xing;
Ans=i;
}
}
return Ans;

}

void Tree::CreateTree(Node* root,treedata now,int pre)
{
root->pre=pre;
int flag=1;
int aim=now.data[0][0];
//只有一种答案,建树终止
for(int i=0;i<now.data.size();i++){
if(aim!=now.data[i][0]){
flag=0;
break;
}
}
if(flag){
root->At=0;
root->ans=aim;
return;
}

int num=0;
for(int i=1;i<Pro_num;i++){
if(now.use[i]==1){
num++;
}
}

//属性都用过了,建树终止,答案为最普遍的输出
if(num==Pro_num-1){
root->At=0;
Node* a;
a = new Node();
a->ans=most(now);
a->At=0;
root->child.push_back(a);
return;
}
//选出信息增益最高的属性建树
int xing=Biggest(now);
int x=Pre_zinum[xing];
root->At= xing;
for(int i=0;i<x;i++){
treedata newdata;
newdata.data.clear();
for(int j=0;j<Pro_num;j++){
newdata.use[j]=now.use[j];
}
newdata.use[xing]=1;
for(int j=0;j<now.data.size();j++){
if(now.data[j][xing]==i){
newdata.data.push_back(now.data[j]);
}
}
Node* to;
to = new Node();
CreateTree(to, newdata, i);;
root->child.push_back(to);
}

}

#endif /* ID3_h */


main.cpp

#include <iostream>
#include <string>
#include <math.h>
#include <vector>
#include "ID3.h"

using namespace std;

//#define Dataset vector< vector<int> >  //数据
//const int Pro_num = 5;//属性的数量
//const int Data_num = 14;//数据的数量
//const int Ans_num = 2;//答案的数量
//const int Pre_zinum[Pro_num]={0,3,3,2,2}; // 属性的种数 //比如outlook有3种

enum outlook {SUNNY, OVERCAST, RAIN };
enum temp    {HOT,   MILD,     COOL };
enum hum     {HIGH,  NORMAL         };
enum windy   {WEAK,  STRONG         };

int main()
{
int samples[14][5] =
{
{0,SUNNY   ,       HOT ,      HIGH  ,       WEAK  },
{0,SUNNY   ,       HOT ,      HIGH  ,       STRONG},
{1,OVERCAST,       HOT ,      HIGH  ,       WEAK  },
{1,RAIN    ,       MILD,      HIGH  ,       WEAK  },
{1,RAIN    ,       COOL,      NORMAL,       WEAK  },
{0,RAIN    ,       COOL,      NORMAL,       STRONG},
{1,OVERCAST,       COOL,      NORMAL,       STRONG},
{0,SUNNY   ,       MILD,      HIGH  ,       WEAK  },
{1,SUNNY   ,       COOL,      NORMAL,       WEAK  },
{1,RAIN    ,       MILD,      NORMAL,       WEAK  },
{1,SUNNY   ,       MILD,      NORMAL,       STRONG},
{1,OVERCAST,       MILD,      HIGH  ,       STRONG},
{1,OVERCAST,       HOT ,      NORMAL,       WEAK  },
{0,RAIN    ,       MILD,      HIGH  ,       STRONG}
};
vector<vector<int> > array;
vector<int> g;
for(int i=0;i<Data_num;i++){
g.clear();
for(int j=0;j<Pro_num;j++){
g.push_back(samples[i][j]);
}
array.push_back(g);
}
treedata now;
now.data=array;
memset(now.use,0,sizeof now.use);
Tree Decision_Tree;
Node*root;
root = new Node();
Decision_Tree.init();
Decision_Tree.CreateTree(root,now,-1);
//int ceshi[5]={-1,SUNNY,COOL,HIGH,WEAK};
Decision_Tree.output(root, 0);
for(int i=0;i<Data_num;i++){
printf("%d\n",Decision_Tree.ask(samples[i],root));
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: