apriori算法的代码,python实现,参考《机器学习实战》
2016-10-12 10:56
429 查看
from numpy import * def loadDataSet(): return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]] def createC1(dataSet): C1 = [] for transaction in dataSet: for item in transaction: if not [item] in C1: C1.append([item]) C1.sort() return map(frozenset , C1) def scanD(D,Ck,minSupport): ssCnt = {} for tid in D: for can in Ck: if can.issubset(tid): if not ssCnt.has_key(can): ssCnt[can] =1 else: ssCnt[can] += 1 numItems = float(len(D)) retList = [] supportData = {} for key in ssCnt: support = ssCnt[key]/numItems if support >=minSupport: retList.insert(0,key) supportData[key] = support return retList,supportData def aprioriGen(Lk,k): #create CK retList = [] lenLk = len(Lk) for i in range(lenLk): for j in range(i+1,lenLk): L1 = list(Lk[i])[:k-2] ; L2 = list(Lk[j])[:k-2] L1.sort(); L2.sort() if L1==L2: retList.append(Lk[i] | Lk[j]) return retList def apriori(dataSet , minSupport =0.5): C1 = createC1(dataSet) D = map(set,dataSet) L1,supportData = scanD(D,C1,minSupport) L = [L1] k = 2 while (len(L[k-2]) >0 ): Ck = aprioriGen(L[k - 2],k) Lk ,supK = scanD(D , Ck,minSupport) supportData.update(supK) L.append(Lk) k += 1 return L, supportData def generateRules(L, supportData, minConf=0.7): #supportData is a dict coming from scanD bigRuleList = [] for i in range(1, len(L)):#only get the sets with two or more items for freqSet in L[i]: H1 = [frozenset([item]) for item in freqSet] if (i > 1): rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf) else: calcConf(freqSet, H1, supportData, bigRuleList, minConf) return bigRuleList def calcConf(freqSet, H, supportData, brl, minConf=0.7): prunedH = [] #create new list to return for conseq in H: conf = supportData[freqSet]/supportData[freqSet-conseq] #calc confidence if conf >= minConf: print freqSet-conseq,'-->',conseq,'conf:',conf brl.append((freqSet-conseq, conseq, conf)) prunedH.append(conseq) return prunedH def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7): m = len(H[0]) if (len(freqSet) > (m + 1)): #try further merging Hmp1 = aprioriGen(H, m+1)#create Hm+1 new candidates Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf) if (len(Hmp1) > 1): #need at least two sets to merge rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)
相关文章推荐
- Apriori算法及python代码实现
- KNN算法Python实现(代码来自机器学习实战)及注释
- # 机器学习数据挖掘关联规则挖掘Apriori算法python代码实现
- CART之回归树python代码实现
- 使用VC内嵌Python实现的一个代码检测工具
- python 中文字符串的处理实现代码
- 将C++代码全部写到头文件:)python脚本帮助自动生成相应的实现文件初始框架
- K-means和K-means++算法代码实现(Python)
- python3音乐播放器简单实现代码
- python中kmeans聚类实现代码
- python 域名分析工具实现代码
- K-近邻算法的python实现代码分享
- 手把手教你用1行代码实现人脸识别 -- Python Face_recognition
- bat和python批量重命名文件的实现代码
- 【代码】Python冒泡排序的实现
- PCA的python代码实现
- Python3一行代码实现乘法表
- crf的Python实现代码
- 用python实现一个清理工具的代码