您的位置:首页 > 编程语言 > Python开发

python之实战----决策树(ID3,C4.5,CART)战sin(x)+随机噪声

2017-10-23 14:17 656 查看
ID3是采用了信息增益作为特征的选择的度量(越大越好),而C4.5采用的是信息增益比(越大越好),CART分类树采用基尼指数选择最优特征(越小越好)。

1.回归决策树:

#-*- coding=utf-8 -*-
import numpy  as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection  import train_test_split
import matplotlib.pyplot as plt
def creat_data(n):
'''
随机产生数据集
'''
np.random.seed(0)
X=5*np.random.rand(n,1)#n*1矩阵
#print(X)
y=np.sin(X).ravel()
#print(y)
noise_num=(int)(n/5)
#print(noise_num)
y[::5]+=3*(0.5-np.random.rand(noise_num))
return train_test_split(X,y,test_size=0.25,random_state=1)

def  test_DecisionTreeRegressor(*data):
'''
#在Python里,带*的参数就是用来接受可变数量参数的。
'''
X_train,X_test,y_train,y_test=data
regr=DecisionTreeRegressor()
regr.fit(X_train,y_train)
print("the train score :%f"%(regr.score(X_train,y_train)))
print("the test score:%f "%regr.score(X_test,y_test))
#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
X=np.arange(0.0,5.0,0.01)[:,np.newaxis]#增加新维度意思,现在是2维列向量
Y=regr.predict(X)
ax.scatter(X_train,y_train,label="train Sample",c='r')
ax.scatter(X_test,y_test,label="test Sample",c='b')#对应二维坐标点
ax.plot(X,Y,label="Predict_value",Linewidth=1,alpha=0.5)
ax.set_xlabel("Data")
ax.set_ylabel("Target")
ax.set_title("Decision Tree Regressor")
ax.legend(loc='upper right',framealpha=1)#图例alpha代表透明度
plt.show()
if __name__=='__main__':
X_train,X_test,y_train,y_test=creat_data(200)
test_DecisionTreeRegressor(X_train,X_test,y_train,y_test)






2.分类决策树战离散iris--------比较评价准则criterion和划分splitter

#-*- coding=utf-8 -*-
import numpy  as np
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection  import train_test_split
import matplotlib.pyplot as plt
def load_data():
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return train_test_split(X_train,y_train,
test_size=0.25,random_state=0,stratify=y_train)

def creat_data(n):
'''
随机产生数据集
'''
np.random.seed(0)
X=5*np.random.rand(n,1)#n*1矩阵
#print(X)
y=np.sin(X).ravel()
#print(y)
noise_num=(int)(n/5)
#print(noise_num)
y[::5]+=3*(0.5-np.random.rand(noise_num))#间隔5个取出来
return train_test_split(X,y,test_size=0.25,random_state=1)

def  test_DecisionTreeClassifier(*data):
X_train,X_test,y_train,y_test=data
clf=DecisionTreeClassifier()
clf.fit(X_train,y_train)
print("the train score :%f"%(clf.score(X_train,y_train)))
print("the test score:%f "%clf.score(X_test,y_test))
def test_DecisionTreeClassifier_criterion(*data):
X_train,X_test,y_train,y_test=data
criterions=['gini','entropy']
for criterion in criterions:
clf=DecisionTreeClassifier(criterion=criterion)
clf.fit(X_train,y_train)
print('criterrion: %s'%criterion)
print("the train score :%f"%(clf.score(X_train,y_train)))
print("the test score:%f\n "%clf.score(X_test,y_test))
splitters=['best','random']
for splitter in splitters:
clf=DecisionTreeClassifier(splitter=splitter)
clf.fit(X_train,y_train)
print('splitter: %s'%splitter)
print("the train score :%f"%(clf.score(X_train,y_train)))
print("the test score:%f\n "%clf.score(X_test,y_test))
if __name__=='__main__':
X_train,X_test,y_train,y_test=load_data()
test_DecisionTreeClassifier_criterion(X_train,X_test,y_train,y_test)



3.决策树分类
bb1e
的随树深度增加的效果

#-*- coding=utf-8 -*-
import numpy  as np
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection  import train_test_split
import matplotlib.pyplot as plt
def load_data():
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return train_test_split(X_train,y_train, test_size=0.25,random_state=0,stratify=y_train)

def test_DecisionTreeClassifier_depth(maxdepth,*data):#不可以把maxdepth放后面
X_train,X_test,y_train,y_test=data
depths=np.arange(1,maxdepth)
train_score=[]
test_score=[]
for depth in depths:
clf=DecisionTreeClassifier(max_depth=depth)
clf.fit(X_train,y_train)
train_score.append(clf.score(X_train,y_train))
test_score.append(clf.score(X_test,y_test))
#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(depths,train_score,label="train score",marker='*',color='r')
ax.plot(depths,test_score,label="test score",marker='o',color='b')
ax.set_xlabel("maxdepth")
ax.set_ylabel("score")
ax.set_title("Decision Tree Classifier_depth")
ax.legend(loc="best",framealpha=0.5)
plt.show()
if __name__=='__main__':
X_train,X_test,y_train,y_test=load_data()
test_DecisionTreeClassifier_depth(100,X_train,X_test,y_train,y_test)



4.最后在画图

#-*- coding=utf-8 -*-
from sklearn.tree import export_graphviz
import numpy  as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection  import train_test_split
import matplotlib.pyplot as plt
from sklearn import datasets
def load_data():
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return train_test_split(X_train,y_train, test_size=0.25,random_state=0,stratify=y_train)
X_train,X_test,y_train,y_test=load_data()
clf=DecisionTreeClassifier()
clf.fit(X_train,y_train)
export_graphviz(clf,"F:/out")#在F下生成out文件一般打不开




下载Graphviz

安装Graphviz后,新建一个文件夹,建立一个bat文件取名【GenerateJpgsBatch.bat】,代码如下:

:: 在这里设置调用文件路径
set dotPath=D:\graph
set sourcePath=F:

rem 测试文件XXX
%dotPath%\bin\dot.exe -Tjpg %sourcePath%\out -o %sourcePath%\XXX.jpg

pause
其中dotPath为dot代码文件地址,sourcePath为生成后文件的地址。

上面的代码就是用dot.exe,将一个名为XXX.dot的文件,生成为图片XXX.jpg。

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: