算法实践(三)【任务2 - 模型评估】
2018-12-27 17:46
330 查看
【任务2 - 模型评估】
记录7个模型(在Task1的基础上)关于accuracy、precision,recall和F1-score、auc值的评分表格,并画出Roc曲线
# 加载库 import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn import metrics import matplotlib.pyplot as plt # 读取数据 data_all = pd.read_csv('data_all.csv', encoding='gbk') # 划分数据集 x = data_all.drop(columns=["status"]).as_matrix() y = data_all[["status"]].as_matrix() y = y.ravel() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2018) # 归一化处理 scaler = StandardScaler() scaler.fit(x_train) x_train_standard = scaler.transform(x_train) x_test_standard = scaler.transform(x_test) # 定义评分函数 def get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba): train_accuracy = metrics.accuracy_score(y_train, y_train_predict) test_accuracy = metrics.accuracy_score(y_test, y_test_predict) # 精准率 train_precision = metrics.precision_score(y_train, y_train_predict) test_precision = metrics.precision_score(y_test, y_test_predict) # 召回率 train_recall = metrics.recall_score(y_train, y_train_predict) test_recall = metrics.recall_score(y_test, y_test_predict) # F1-score train_f1_score = metrics.f1_score(y_train, y_train_predict) test_f1_score = metrics.f1_score(y_test, y_test_predict) # AUC train_auc = metrics.roc_auc_score(y_train, y_train_proba) test_auc = metrics.roc_auc_score(y_test, y_test_proba) # ROC train_fprs, train_tprs, train_thresholds = metrics.roc_curve(y_train, y_train_proba) test_fprs, test_tprs, test_thresholds = metrics.roc_curve(y_test, y_test_proba) plt.plot(train_fprs, train_tprs) plt.plot(test_fprs, test_tprs) plt.title("ROC Curve") plt.xlabel("FPR") plt.ylabel("TPR") plt.show() # 输出评分 print("训练集准确率:", train_accuracy) print("测试集准确率:", test_accuracy) print("训练集精准率:", train_precision) print("测试集精准率:", test_precision) print("训练集召回率:", train_recall) print("测试集召回率:", test_recall) print("训练集F1-score:", train_f1_score) print("测试集F1-score:", test_f1_score) print("训练集AUC:", train_auc) print("测试集AUC:", test_auc) # 逻辑回归 from sklearn.linear_model import LogisticRegression lr = LogisticRegression(random_state=2018) lr.fit(x_train_standard, y_train) y_train_predict = lr.predict(x_train_standard) y_test_predict = lr.predict(x_test_standard) y_train_proba = lr.predict_proba(x_train_standard)[:, 1] y_test_proba = lr.predict_proba(x_test_standard)[:, 1] get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba) # SVM from sklearn.svm import LinearSVC svm_linearSVC = LinearSVC(random_state=2018) svm_linearSVC.fit(x_train_standard, y_train) y_train_predict = svm_linearSVC.predict(x_train_standard) y_test_predict = svm_linearSVC.predict(x_test_standard) y_train_proba = svm_linearSVC.decision_function(x_train_standard) y_test_proba = svm_linearSVC.decision_function(x_test_standard) get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba) # 决策树 from sklearn.tree import DecisionTreeClassifier tree = DecisionTreeClassifier(random_state=2018) tree.fit(x_train_standard, y_train) y_train_predict = tree.predict(x_train_standard) y_test_predict = tree.predict(x_test_standard) y_train_proba = tree.predict_proba(x_train_standard)[:, 1] y_test_proba = tree.predict_proba(x_test_standard)[:, 1] get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba) # 随机森林 from sklearn.ensemble import RandomForestClassifier rf = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=2018) rf.fit(x_train_standard, y_train) y_train_predict = rf.predict(x_train_standard) y_test_predict = rf.predict(x_test_standard) y_train_proba = rf.predict_proba(x_train_standard)[:, 1] y_test_proba = rf.predict_proba(x_test_standard)[:, 1] get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba) # GBDT from sklearn.ensemble import GradientBoostingClassifier gb = GradientBoostingClassifier(random_state=2018) gb.fit(x_train_standard, y_train) y_train_predict = gb.predict(x_train_standard) y_test_predict = gb.predict(x_test_standard) y_train_proba = gb.predict_proba(x_train_standard)[:, 1] y_test_proba = gb.predict_proba(x_test_standard)[:, 1] get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba) # XGBoost from xgboost import XGBClassifier xgb = XGBClassifier(random_state=2018) xgb.fit(x_train_standard, y_train) y_train_predict = xgb.predict(x_train_standard) y_test_predict = xgb.predict(x_test_standard) y_train_proba = xgb.predict_proba(x_train_standard)[:, 1] y_test_proba = xgb.predict_proba(x_test_standard)[:, 1] get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba) # LightGBM from lightgbm import LGBMClassifier lg = LGBMClassifier(random_state=2018) lg.fit(x_train_standard, y_train) y_train_predict = lg.predict(x_train_standard) y_test_predict = lg.predict(x_test_standard) y_train_proba = lg.predict_proba(x_train_standard)[:, 1] y_test_proba = lg.predict_proba(x_test_standard)[:, 1] get_scores(y_train, y_test, y_train_predict, y_test_predict, y_train_proba, y_test_proba)
运行结果:
1.逻辑回归 训练集准确率: 0.8049293657950105 测试集准确率: 0.7876664330763841 训练集精准率: 0.7069351230425056 测试集精准率: 0.6609195402298851 训练集召回率: 0.37889688249400477 测试集召回率: 0.3203342618384401 训练集F1-score: 0.4933645589383294 测试集F1-score: 0.4315196998123827 训练集AUC: 0.8198240444948492 测试集AUC: 0.7657402480882185 2.svm 训练集准确率: 0.8025247971145176 测试集准确率: 0.7813594954449895 训练集精准率: 0.7251908396946565 测试集精准率: 0.6535947712418301 训练集召回率: 0.34172661870503596 测试集召回率: 0.2785515320334262 训练集F1-score: 0.4645476772616136 测试集F1-score: 0.390625 训练集AUC: 0.8210514620794339 测试集AUC: 0.7677954784931093 3.决策树 训练集准确率: 1.0 测试集准确率: 0.6853538892782061 训练集精准率: 1.0 测试集精准率: 0.38402061855670105 训练集召回率: 1.0 测试集召回率: 0.415041782729805 训练集F1-score: 1.0 测试集F1-score: 0.3989290495314592 训练集AUC: 1.0 测试集AUC: 0.5956295055971123 4.随机森林 训练集准确率: 1.0 测试集准确率: 0.779957953749124 训练集精准率: 1.0 测试集精准率: 0.6691729323308271 训练集召回率: 1.0 测试集召回率: 0.2479108635097493 训练集F1-score: 1.0 测试集F1-score: 0.36178861788617883 训练集AUC: 1.0 测试集AUC: 0.7489736888777607 5.GBDT 训练集准确率: 0.8623384430417794 测试集准确率: 0.7806587245970568 训练集精准率: 0.8836734693877552 测试集精准率: 0.6116504854368932 训练集召回率: 0.5191846522781774 测试集召回率: 0.35097493036211697 训练集F1-score: 0.6540785498489425 测试集F1-score: 0.44601769911504424 训练集AUC: 0.9207464353427005 测试集AUC: 0.7633146589047812 6.XGBoost 训练集准确率: 0.8539224526600541 测试集准确率: 0.7841625788367204 训练集精准率: 0.875 测试集精准率: 0.624390243902439 训练集召回率: 0.486810551558753 测试集召回率: 0.3565459610027855 训练集F1-score: 0.6255778120184899 测试集F1-score: 0.45390070921985815 训练集AUC: 0.9174710772897927 测试集AUC: 0.7708522424963224 7.LightGBM 训练集准确率: 0.9957920048091373 测试集准确率: 0.7701471618780659 训练集精准率: 1.0 测试集精准率: 0.5688888888888889 训练集召回率: 0.9832134292565947 测试集召回率: 0.3565459610027855 训练集F1-score: 0.9915356711003627 测试集F1-score: 0.4383561643835616 训练集AUC: 0.9999826853318788 测试集AUC: 0.7535210165566023
ROC曲线:
- 逻辑回归
- svm
- 决策树
- 随机森林
- GBDT
- XGBoost
- LightGBM
相关文章推荐
- 算法实践(四)【任务3 - 模型调优】
- 算法实践进阶(三)【任务3 - 模型融合】
- 社交中的用户价值、状态评估及算法匹配模型
- 美团推荐算法实践:机器学习重排序模型成亮点
- R语言︱机器学习模型评估方案(以随机森林算法为例)
- 1.3 KNN算法学习——模型评估与选择
- 挖掘DBLP作者合作关系,FP-Growth算法实践(3):挖掘任务、思路简介
- 美团推荐算法实践:机器学习重排序模型成亮点
- 美团推荐算法实践:机器学习重排序模型成亮点
- 1.5 KNN算法学习——KNN算法分类模型的实现与分类准确度评估
- 算法实践-任务调度-最小惩罚算法-贪心算法
- 编程实践--KNN分类算法--手写数字识别任务
- Coursera机器学习算法评估模型选择算法优化相关概念记录
- 车牌检测回归任务(三、目标检测模型评估)
- 图像配准建立仿射变换模型并用RANSAC算法评估
- 三维模型变形算法 理论和实践 C#版本pdf
- caffe初步实践---------使用训练好的模型完成语义分割任务
- 车牌检测回归任务(二、定义网络模型、训练算法)
- 美团推荐算法实践:机器学习重排序模型成亮点
- 事务信息系统-并发控制与恢复的理论, 算法与实践-计算模型, 并发控制部分