支持向量机,K近邻模型,回归树在不同配置下的性能比较
2018-02-04 22:49
423 查看
1 延续上一篇,还是采用美国波士顿房价测试数据,对支持向量机,K近邻模型以及回归树采用不同的参数配置,进行同一模型在不同配置下的回归预测性能的评估
2 实验代码及结果截图
#导入数据读取器
from sklearn.datasets import load_boston
boston=load_boston()
#数据分割
from sklearn.cross_validation import train_test_split
import numpy as np
X=boston.data
y=boston.target
#随机25%的测试样本数据,其他为训练样本数据
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=33,test_size=0.25)
#数据的标准化处理
from sklearn.preprocessing import StandardScaler
ss_X=StandardScaler()
ss_y=StandardScaler()
X_train=ss_X.fit_transform(X_train)
X_test=ss_X.fit_transform(X_test)
y_train=ss_y.fit_transform(y_train.reshape(-1,1))
y_test=ss_y.fit_transform(y_test.reshape(-1,1))
from sklearn.svm import SVR
#使用线性核函数配置向量机进行回归训练并预测
linear_svr=SVR(kernel='linear')
linear_svr.fit(X_train, y_train)
linear_svr_y_predict=linear_svr.predict(X_test)
#多项式核函数配置
poly_svr=SVR(kernel='poly')
poly_svr.fit(X_train, y_train)
poly_svr_y_predict=poly_svr.predict(X_test)
#径向基核函数配置
rbf_svr=SVR(kernel='rbf')
rbf_svr.fit(X_train, y_train)
rbf_svr_y_predict=rbf_svr.predict(X_test)
#回归性能的评估
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
print '三种不同配置下的支持向量机的回归分析'
print 'linear SVR'
print 'R-squared:',linear_svr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict))
print 'Poly SVR'
print 'R-squared:',poly_svr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict))
print 'rbf SVR'
print 'R-squared:',rbf_svr.score(
4000
X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict))
#两种配置下K近邻模型的回归性能分析
#导入K近邻模型
from sklearn.neighbors import KNeighborsRegressor
#初始化,使预测的方式为平均回归:weights='uniform'
uni_knr=KNeighborsRegressor(weights='uniform')
uni_knr.fit(X_train, y_train)
uni_knr_y_predict=uni_knr.predict(X_test)
#初始化预测方式为根据距离加权回归:weight='distance'
dis_knr=KNeighborsRegressor(weights='distance')
dis_knr.fit(X_train, y_train)
dis_knr_y_predict=dis_knr.predict(X_test)
#性能评估
print '两种配置下K近邻模型的回归性能分析'
print '平均回归配置'
print 'R-squared',uni_knr.score(X_test,y_test)
print 'mean_squared',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict))
print 'mean_absolute',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict))
print '距离加权回归'
print 'R-squared',dis_knr.score(X_test,y_test)
print 'mean_squared',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict))
print 'mean_absolute',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict))
#回归树
#模型导入
from sklearn.tree import DecisionTreeRegressor
dtr=DecisionTreeRegressor()
#构建回归树
dtr.fit(X_train, y_train)
#单一回归树
dtr_y_predict=dtr.predict(X_test)
#评估
print '单一回归树'
print 'R-squared:',dtr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict))
#集成模型
#模型导入
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,GradientBoostingRegressor
#模型训练及预测
rfr=RandomForestRegressor()
rfr.fit(X_train,y_train)
rfr_y_predict=rfr.predict(X_test)
etr=ExtraTreesRegressor()
etr.fit(X_train,y_train)
etr_y_predict=etr.predict(X_test)
gbr=GradientBoostingRegressor()
gbr.fit(X_train, y_train)
gbr_y_predict=gbr.predict(X_test)
print '随机回归森林'
print 'R-squared:',rfr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rfr_y_predict))
print '极端回归森林'
print 'R-squared:',etr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(etr_y_predict))
#每种特征对预测目标的贡献度
print '每种特征对预测目标的贡献度'
print np.sort(zip(etr.feature_importances_,boston.feature_names),axis=0)
#梯度提升回归树
print '梯度提升回归树'
print 'R-squared:',gbr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(gbr_y_predict))
2 实验代码及结果截图
#导入数据读取器
from sklearn.datasets import load_boston
boston=load_boston()
#数据分割
from sklearn.cross_validation import train_test_split
import numpy as np
X=boston.data
y=boston.target
#随机25%的测试样本数据,其他为训练样本数据
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=33,test_size=0.25)
#数据的标准化处理
from sklearn.preprocessing import StandardScaler
ss_X=StandardScaler()
ss_y=StandardScaler()
X_train=ss_X.fit_transform(X_train)
X_test=ss_X.fit_transform(X_test)
y_train=ss_y.fit_transform(y_train.reshape(-1,1))
y_test=ss_y.fit_transform(y_test.reshape(-1,1))
from sklearn.svm import SVR
#使用线性核函数配置向量机进行回归训练并预测
linear_svr=SVR(kernel='linear')
linear_svr.fit(X_train, y_train)
linear_svr_y_predict=linear_svr.predict(X_test)
#多项式核函数配置
poly_svr=SVR(kernel='poly')
poly_svr.fit(X_train, y_train)
poly_svr_y_predict=poly_svr.predict(X_test)
#径向基核函数配置
rbf_svr=SVR(kernel='rbf')
rbf_svr.fit(X_train, y_train)
rbf_svr_y_predict=rbf_svr.predict(X_test)
#回归性能的评估
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
print '三种不同配置下的支持向量机的回归分析'
print 'linear SVR'
print 'R-squared:',linear_svr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict))
print 'Poly SVR'
print 'R-squared:',poly_svr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict))
print 'rbf SVR'
print 'R-squared:',rbf_svr.score(
4000
X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict))
#两种配置下K近邻模型的回归性能分析
#导入K近邻模型
from sklearn.neighbors import KNeighborsRegressor
#初始化,使预测的方式为平均回归:weights='uniform'
uni_knr=KNeighborsRegressor(weights='uniform')
uni_knr.fit(X_train, y_train)
uni_knr_y_predict=uni_knr.predict(X_test)
#初始化预测方式为根据距离加权回归:weight='distance'
dis_knr=KNeighborsRegressor(weights='distance')
dis_knr.fit(X_train, y_train)
dis_knr_y_predict=dis_knr.predict(X_test)
#性能评估
print '两种配置下K近邻模型的回归性能分析'
print '平均回归配置'
print 'R-squared',uni_knr.score(X_test,y_test)
print 'mean_squared',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict))
print 'mean_absolute',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict))
print '距离加权回归'
print 'R-squared',dis_knr.score(X_test,y_test)
print 'mean_squared',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict))
print 'mean_absolute',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict))
#回归树
#模型导入
from sklearn.tree import DecisionTreeRegressor
dtr=DecisionTreeRegressor()
#构建回归树
dtr.fit(X_train, y_train)
#单一回归树
dtr_y_predict=dtr.predict(X_test)
#评估
print '单一回归树'
print 'R-squared:',dtr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict))
#集成模型
#模型导入
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,GradientBoostingRegressor
#模型训练及预测
rfr=RandomForestRegressor()
rfr.fit(X_train,y_train)
rfr_y_predict=rfr.predict(X_test)
etr=ExtraTreesRegressor()
etr.fit(X_train,y_train)
etr_y_predict=etr.predict(X_test)
gbr=GradientBoostingRegressor()
gbr.fit(X_train, y_train)
gbr_y_predict=gbr.predict(X_test)
print '随机回归森林'
print 'R-squared:',rfr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rfr_y_predict))
print '极端回归森林'
print 'R-squared:',etr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(etr_y_predict))
#每种特征对预测目标的贡献度
print '每种特征对预测目标的贡献度'
print np.sort(zip(etr.feature_importances_,boston.feature_names),axis=0)
#梯度提升回归树
print '梯度提升回归树'
print 'R-squared:',gbr.score(X_test,y_test)
print 'mean_squared:',mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict))
print 'mean_absolute:',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(gbr_y_predict))
相关文章推荐
- 使用两种不同配置的K近邻回归模型对美国波士顿房价数据进行回归预测
- 使用三种不同核函数配置的支持向量机(回归)模型对美国波士顿地区房价进行预测
- 使用两种不同配置的K近邻(回归)模型对美国波士顿房价数据进行回归预测
- K近邻回归模型对Boston房价进行预测,同时对性能进行评估(1.使用普通的算术平均法2.考虑距离差异进行加权平均)
- Java不同压缩算法的性能比较
- Go语言中三种不同md5计算方式的性能比较
- Java不同压缩算法的性能比较
- 朴素、Select、Poll和Epoll网络编程模型实现和分析——Poll、Epoll模型处理长连接性能比较
- 五种不同的 URL 参数解析方法的性能比较
- Spring-不同配置方式的比较
- 树模型和线性回归 在回归问题中的比较
- mysql性能测试及不同版本的比较
- R语言评测回归模型的性能
- TensorFlow 深度学习框架(6)-- mnist 数字识别及不同模型效果比较
- 详细剖析市面手机基带/射频/处理器配置, 比较各家手机性能
- mysql性能测试及不同版本的比较(转)
- oracle技术之oracle数据泵不同工作方式性能比较(五)
- 《Spark机器学习》笔记——Spark回归模型(最小二乘回归、决策树回归,模型性能评估、目标变量变换、参数调优)
- 五种不同的 URL 参数解析方法的性能比较
- Bean不同配置方式比较