您的位置:首页 > 编程语言 > Python开发

Apriori算法的python实现

2015-07-20 10:53 591 查看
def loadDataSet():
return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]

def creatC1(dataSet):
C1=[]
C=[]
for transaction in dataSet:
C=C+transaction
C=list(set(C))
for i in C:
C1.append([i])
C1.sort()
return map(frozenset,C1)

def scanD(D,Ck,minSupport):
ssCnt={}
for tid in D:
for can in Ck:
if can.issubset(tid):
ssCnt[can]=ssCnt.get(can,0)+1
numItems=float(len(D))
retList=[]
supportData={}
for key in ssCnt:
support=ssCnt[key]/numItems
if support>=minSupport:
retList.insert(0, key)
supportData[key]=support
return retList,supportData

def apprioriGen(Lk,k):
retList=[]
lenLk=len(Lk)
for i in range(lenLk):
for j in range(i+1,lenLk):
L1=list(Lk[i])[:k-2]
L2=list(Lk[j])[:k-2]
L1.sort();L2.sort()
if L1==L2:
retList.append(Lk[i]|Lk[j])
return retList

def appriori(dataSet,minSupport=0.5):
C1=creatC1(dataSet)
D=map(set,dataSet)
L1,supportData=scanD(D, C1, minSupport)
L=[L1]
k=2
while len(L[k-2])>1:
Ck=apprioriGen(L[k-2], k)
Lk,supportk=scanD(D, Ck, minSupport)
L.append(Lk)
supportData.update(supportk)
k+=1
return L,supportData

def generateRule(item,supportData,minConf=0.7):   #针对一个频繁项集 产生规则
l=[]
rule=[]
k=2
for i in list(item):
l.append([i])
List=map(frozenset,l)
for doc in List:
if (supportData[item]/supportData[item-doc])>=minConf:
print item-doc,'--->',doc,'Conf',(supportData[item]/supportData[item-doc])
rule.append(doc)
while len(rule)>0:
rule=apprioriGen(rule, k)
if len(rule)==0 or rule[0]==item:break
else:
for j in rule:
if (supportData[item]/supportData[item-j])>=minConf:
print item-j,'--->',j,'Conf',(supportData[item]/supportData[item-j])
k+=1

def generateRules(dataSet,supportData,L,minConf=0.7):  #遍历所有的频繁项集 产生所有的规则
for l in L[1:]:
for item in l:
generateRule(item, supportData, minConf)

dataSet=loadDataSet()
L,supportData=appriori(dataSet)
print supportData
print L
generateRules(dataSet, supportData, L, 0.6)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: