Accord C4.5决策树算法(C# C4.5决策树算法)
2016-01-26 12:41
1001 查看
算法直接调用Accord.net Framework封装好的方法,所以要下载Accord机器学习的DLL,下载地址见Accord官网。C4.5算法要引用的包括:
using Accord.IO;
using Accord.MachineLearning.DecisionTrees;
using Accord.MachineLearning.DecisionTrees.Learning;
using Accord.MachineLearning.DecisionTrees.Rules;
using Accord.Math;
using Accord.Statistics.Analysis;
using Accord.Statistics.Filters;
using AForge;
using System.Numerics;
我是写成winform窗体程序形式的,实现了五个按钮的Click()事件,分别用于:
1. MenuFileOpen_Click ():导入测试数据集;
2. upAttributeType_Click():导入测试数据的属性类别,属性类别分为Continuous(数值型)和Symbolic(符号型);
3. upTestingData_Click():导入训练数据集;
4. btnCreate_Click():建立分类模型;
5. btnTestingRun_Click():测试模型。
算法代码:
程序可以参考:https://github.com/accord-net/framework/blob/development/Unit%20Tests/Accord.Tests.MachineLearning/DecisionTrees/C45LearningTest.cs
using Accord.IO;
using Accord.MachineLearning.DecisionTrees;
using Accord.MachineLearning.DecisionTrees.Learning;
using Accord.MachineLearning.DecisionTrees.Rules;
using Accord.Math;
using Accord.Statistics.Analysis;
using Accord.Statistics.Filters;
using AForge;
using System.Numerics;
我是写成winform窗体程序形式的,实现了五个按钮的Click()事件,分别用于:
1. MenuFileOpen_Click ():导入测试数据集;
2. upAttributeType_Click():导入测试数据的属性类别,属性类别分为Continuous(数值型)和Symbolic(符号型);
3. upTestingData_Click():导入训练数据集;
4. btnCreate_Click():建立分类模型;
5. btnTestingRun_Click():测试模型。
算法代码:
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using System.IO; using Accord.IO; using Accord.MachineLearning.DecisionTrees; using Accord.MachineLearning.DecisionTrees.Learning; using Accord.MachineLearning.DecisionTrees.Rules; using Accord.Math; using Accord.Statistics.Analysis; using Accord.Statistics.Filters; using AForge; using System.Numerics; namespace 粗糙集属性选择_V1._0.DecisionTrees { public partial class DecisionTreesMainForm : DockContent { // 初始化树 DecisionTree tree; //训练数据表 DataTable myDT = new DataTable(); //属性类别表 DataTable myDTAttType = new DataTable(); //测试数据表 DataTable myTestDT = new DataTable(); //属性数值化的矩阵 Codification codebook; public DecisionTreesMainForm() { InitializeComponent(); dgvLearningSource.AutoGenerateColumns = true; dgvPerformance.AutoGenerateColumns = false; openFileDialog.InitialDirectory = Path.Combine(Application.StartupPath, "Resources"); } /// <summary> /// 进行学习并创建决策树 /// </summary> /// private void btnCreate_Click(object sender, EventArgs e) { if (dgvLearningSource.DataSource == null) { MessageBox.Show("请先导入训练数据!"); return; } if (attributeTypeGridView.DataSource == null) { MessageBox.Show("请先导入数据的属性类型!"); return; } // 完成和给定的数据保存修改 dgvLearningSource.EndEdit(); // MessageBox.Show(myDT.Columns.Count + ""); //初始化columnNames int myDTColNum = myDT.Columns.Count; String[] colNM = new string[myDTColNum]; for (int i = 0; i < myDTColNum; i++) { colNM[i] = myDT.Columns[i].ColumnName; } //把属性类别放入hashMap Dictionary<String, String> attTypeMap = new Dictionary<string, string>(); for (int i = 0, attTypeSum = myDTAttType.Rows.Count;i < attTypeSum ; i++) { attTypeMap.Add(myDTAttType.Rows[i][0].ToString(),myDTAttType.Rows[i][1].ToString()); } // 对属性进行数值化 try { codebook = new Codification(myDT, colNM); } catch (Exception ex) { MessageBox.Show("运行出现异常!" + ex.Message); } DecisionVariable[] attributes = new DecisionVariable[myDTColNum - 1]; for (int i = 0; i < myDTColNum - 1; i++) { String getAttType; attTypeMap.TryGetValue(colNM[i], out getAttType); //Console.WriteLine("----" + getAttType); if (getAttType.Equals("Symbolic")) { attributes[i] = new DecisionVariable(colNM[i], codebook[colNM[i]].Symbols); } else { attributes[i] = new DecisionVariable(colNM[i], DecisionVariableKind.Continuous); } } int classCount = codebook[colNM[myDTColNum - 1]].Symbols; // 创建决策树 tree = new DecisionTree(attributes, classCount); //创建C4.5决策树 C45Learning c45 = new C45Learning(tree); // 进行训练学习 DataTable symbols = codebook.Apply(myDT); String[] inputColStr = new string[myDTColNum - 1]; for (int i = 0; i < myDTColNum - 1; i++) { inputColStr[i] = colNM[i]; // MessageBox.Show(inputColStr[i]); } double[][] inputs = symbols.ToArray(inputColStr); int[] outputs = symbols.ToArray<int>(colNM[myDTColNum - 1]); double error = c45.Run(inputs, outputs); //MessageBox.Show(codebook.Translate("A1","yong")+""); // 显示树结构 decisionTreeView1.TreeSource = tree; lbStatus.Text = "树创建完成,可以点击其它选项查看结果!"; } /// <summary> /// 测试模型. /// </summary> /// private void btnTestingRun_Click(object sender, EventArgs e) { if (tree == null) { MessageBox.Show("请先建立模型!"); return; } if (dgvTestingSource.DataSource == null) { MessageBox.Show("请先上传测试数据!"); return; } int testDTRowSum = myTestDT.Rows.Count; int testDTColSum = myTestDT.Columns.Count; int[] expected = new int[testDTRowSum]; for (int i = 0; i < testDTRowSum; i++) { //将测试数据的每一条记录的类别转化成决策树中的类别(类标) expected[i] = codebook.Translate(myTestDT.Columns[testDTColSum - 1].ColumnName, myTestDT.Rows[i][testDTColSum - 1].ToString()); //Console.WriteLine("expected-----" + expected[i]); } int[] actual = new int[testDTRowSum]; String[] myTestDTcolumnsName = new string[testDTColSum]; for(int i=0;i < testDTColSum;i++){ myTestDTcolumnsName[i] = myTestDT.Columns[i].ColumnName; } for (int i = 0; i < testDTRowSum; i++) { try { actual[i] = tree.Compute(codebook.Translate(myTestDT.Rows[i], myTestDTcolumnsName)); } catch (Exception ex) { MessageBox.Show("出现异常!" + ex.Message); } } // 计算分类性能. ConfusionMatrix confusionMatrix = new ConfusionMatrix(actual, expected, 1, 0); dgvPerformance.DataSource = new[] { confusionMatrix }; } //导入数据集 private void MenuFileOpen_Click(object sender, EventArgs e) { if (openFileDialog.ShowDialog(this) == DialogResult.OK) { string filename = openFileDialog.FileName; string extension = Path.GetExtension(filename); if (extension == ".xls" || extension == ".xlsx") { ExcelReader db = new ExcelReader(filename, true, false); TableSelectDialog t = new TableSelectDialog(db.GetWorksheetList()); if (t.ShowDialog(this) == DialogResult.OK) { DataTable tableSource = db.GetWorksheet(t.Selection); this.dgvLearningSource.DataSource = tableSource; myDT = tableSource.Copy(); } } } lbStatus.Text = "导入训练数据集后,点击“创建树”按钮后开始创建树!"; } //导入属性类别 private void upAttributeType_Click(object sender, EventArgs e) { if (openFileDialog.ShowDialog(this) == DialogResult.OK) { string filename = openFileDialog.FileName; string extension = Path.GetExtension(filename); if (extension == ".xls" || extension == ".xlsx") { ExcelReader db = new ExcelReader(filename, true, false); TableSelectDialog t = new TableSelectDialog(db.GetWorksheetList()); if (t.ShowDialog(this) == DialogResult.OK) { DataTable tableSource = db.GetWorksheet(t.Selection); this.attributeTypeGridView.DataSource = tableSource; myDTAttType = tableSource.Copy(); } } } } //导入测试数据集 private void upTestingData_Click(object sender, EventArgs e) { if (openFileDialog.ShowDialog(this) == DialogResult.OK) { string filename = openFileDialog.FileName; string extension = Path.GetExtension(filename); if (extension == ".xls" || extension == ".xlsx") { ExcelReader db = new ExcelReader(filename, true, false); TableSelectDialog t = new TableSelectDialog(db.GetWorksheetList()); if (t.ShowDialog(this) == DialogResult.OK) { DataTable tableSource = db.GetWorksheet(t.Selection); this.dgvTestingSource.DataSource = tableSource; myTestDT = tableSource.Copy(); } } } } } }
程序可以参考:https://github.com/accord-net/framework/blob/development/Unit%20Tests/Accord.Tests.MachineLearning/DecisionTrees/C45LearningTest.cs
相关文章推荐
- C# Best Practices - Handling Strings
- MVC项目发布,C#,.Net
- C#构造函数
- [转载]C#实现获取浏览器信息
- C#中数组、ArrayList和List三者的区别
- c#大圣之路笔记——TFS解决离职人员文件签出遗留签入问题
- C#对象克隆介绍
- C# 自定义路由配置
- C#委托delegate
- Replicate String in C#
- Ubuntu 14.04 上安装 C Sharp 开发环境。
- [C#解惑] #1 在构造函数内调用虚方法
- XSD- XML Schema Definition模式定义
- C# FileStream/StreamWriter/StreamReader的区别
- C#中的构造函数
- Windows C#自动化框架
- 三层结构实现登录功能——C#版
- C# 常用加密处理
- C#使用SharpZipLib解压Zip单个文件到内存
- C# Post方式传输报文,和处理响应