您的位置:首页 > 编程语言 > Python开发

Python数据挖掘入门与实践一:计算支持度和置信度

2017-05-03 19:46 1456 查看


import numpy as np

from collections import defaultdict

#First,how many rows contain our premise:that a person is buying apples

'''num_apple_purchases=0

for sample in X:

    if sample[3]==1:    #this person bought apples

        num_apple_purchases+=1

print num_apple_purchases'''

def calS(X,n_features):

    #print n_features

    #print X[:5]#every row is a purchase record,evey column is a product

    #five kinds of product

    #bread,milk,cheese,apple and banana

    valid_rules=defaultdict(int)

    invalid_rules=defaultdict(int)

    num_occurances=defaultdict(int)

    print X

    for sample in X:

        for premise in range(5):

            if sample[premise]==0:continue

            num_occurances[premise]+=1

            for conclusion in range(n_features):

                if premise==conclusion:continue

                if sample[conclusion]==1:

                    valid_rules[(premise,conclusion)] += 1

                else:

                    invalid_rules[(premise,conclusion)] += 1

    support=valid_rules

    confidence=defaultdict(float)

    for premise,conclusion in valid_rules.keys():

        rule=(premise,conclusion)

        confidence[rule]=float(valid_rules[rule])/num_occurances[premise]    #这里需要将valid_rules的规则条目数从int转成float

    return support,confidence

def print_rule(premise,conclusion,support,confidence,features):

    premise_name=features[premise]

    conclusion_name=features[conclusion]

    print("Rule:If a person buys {0} they will also buy {1}".format(premise_name,conclusion_name))

    print("-Support:{0}".format(support[(premise,conclusion)]))

    print("-Confidence:{0:.3f}".format(confidence[(premise,conclusion)]))

if __name__ == '__main__':

    X=np.loadtxt("affinity_dataset.txt")

    n_samples,n_features=X.shape

    premise=1

    conclusion=3

    support,confidence=calS(X,n_features)

    features = ["bread", "milk", "cheese", "apples", "bananas"]

    print support,confidence

    print_rule(premise,conclusion,support,confidence,features)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息