决策树分类器算法实现
2016-04-22 18:08
393 查看
# -*- coding: cp936 -*- #决策树分类器 my_data=[['slashdot','USA','yes',18,'None'],['google','France','yes',23,'Premium'], ['digg','USA','yes',24,'Basic']] class decisionnode: def __init__(self,col=-1,value=None,results=None,tb=None,fb=None): self.col=col self.value=value self.results=results self.tb=tb self.fb=fb def divideset(rows,column,value): split_function=None if isinstance(value,int) or isinstance(value,float): split_function=lambda row:row[column]>=value else: split_function=lambda row:row[column]==value set1=[row for row in rows if split_function(row)] set2=[row for row in rows if not split_function(row)] return (set1,set2) def uniquecounts(rows): results={} for row in rows: r=row[len(row)-1] if r not in results: results[r]=0 results[r]+=1 return results def entropy(rows): from math import log log2=lambda x:log(x)/log(2) results=uniquecounts(rows) ent=0.0 for r in results.keys(): p=float(results[r])/len(rows) ent=ent-p*log2(p) return ent def buildtree(rows,scoref=entropy): if len(rows)==0: return decisionnode() current_score=scoref(rows) best_gain=0.0 best_criteria=None best_sets=None column_count=len(rows[0])-1 for col in range(0,column_count): column_values={} for row in rows: column_values[row[col]]=1 for value in column_values.keys(): (set1,set2)=divideset(rows,col,value) p=float(len(set1))/len(rows) gain=current_score-p*scoref(set1)-(1-p)*scoref(set2) if gain>best_gain and len(set1)>0 and len(set2)>0: best_gain=gain best_criteria=(col,value) best_sets=(set1,set2) if best_gain>0: trueBranch=buildtree(best_sets[0]) falseBranch=buildtree(best_sets[1]) return decisionnode(col=best_criteria[0],value=best_criteria[1],tb=trueBranch,fb=falseBranch) else: return decisionnode(results=uniquecounts(rows)) def printtree(tree,indent=''): if tree.results!=None: print str(tree.results) else: print str(tree.col)+':'+str(tree.value)+'?' print indent+'T->' printtree(tree.tb,indent+' ') print indent+'F->' printtree(tree.fb,indent+' ') def classify(observation,tree): if tree.results!=None: return tree.results else: v=observation[tree.col] branch=None if isinstance(v,int) or isinstance(v,float): if v>=tree.value:branc=tree.tb else:branch=tree.fb else: if v==tree.value:branch=tree.tb else:branch=tree.fb return classify(observation,branch)
相关文章推荐
- 刘鑫成同学与老师们的对话:“学编程=敲代码?”
- mybatis自动创建代码
- RNN学习笔记(六)-GRU,LSTM 代码实现
- 解决C:\fakepath路径加密问题,图片上传之前的预览功能的实现,html5 实现图片预览功能
- Python练习 - 廖雪峰教程
- 尴尬,AS导入网络下载的demo不会打开运行+++Plugin with id 'com.android.application' not found错误解决
- dns
- HDU-4586 Play the Dice (数学)
- 状态机
- fragment与Activity交互时常遇到的问题小结
- Java并发之volatile二
- 移动web学习--淘宝 flexible.js
- 团队作业1.0+2.0
- 如何把textfield或者textview中长按出现的(全选,复制,粘贴)显示成中文
- Nginx + tomcat 实现简单集群(基于反向代理方式)
- Java 优先级的问题
- proftp权限设置
- Mybatis最入门---动态查询(if)
- Android多媒体开发:MusicPlayer篇
- hessian序列化区别