您的位置:首页 > 编程语言 > Python开发

数据挖掘之推荐分析--python实现

2016-03-11 11:15 429 查看
#

#  FILTERINGDATA.py

#

#  Code file for the book Programmer's Guide to Data Mining

#  http://guidetodatamining.com

#  Ron Zacharski

#

# -*- coding:utf8 -*

from math import sqrt

users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},

         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},

         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},

         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},

         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},

         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},

         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},

         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}

        }

def manhattan(rating1, rating2):

    """Computes the Manhattan distance. Both rating1 and rating2 are dictionaries

       of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""

    distance = 0

    total = 0

    for key in rating1:

        if key in rating2:

            distance += abs(rating1[key] - rating2[key])

            total += 1

    if total > 0:

        return distance / total

    else:

        return -1 #Indicates no ratings in common

def pearson(rating1, rating2):

    sum_xy = 0

    sum_x = 0

    sum_y = 0

    sum_x2 = 0

    sum_y2 = 0

    n = 0

    for key in rating1:

        if key in rating2:

            n += 1

            x = rating1[key]

            y = rating2[key]

            sum_xy += x * y

            sum_x += x

            sum_y += y

            sum_x2 += pow(x, 2)

            sum_y2 += pow(y, 2)

    # now compute denominator

    denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n)

    if denominator == 0:

        return 0

    else:

        return (sum_xy - (sum_x * sum_y) / n) / denominator

            

def cosSimilarity (rating1,rating2):

    similarityDegree=0

    innerProd_xy=0

    vector_x2=0

    vector_y2=0

    for key in rating1 :

        if key in rating2:

            innerProd_xy+=rating1[key]*rating2[key]

            vector_x2+=rating1[key]**2

            vector_y2+=rating2[key]**2

    if sqrt(vector_x2)*sqrt(vector_y2):

        similarityDegree=innerProd_xy/(sqrt(vector_x2)*sqrt(vector_y2))

        return similarityDegree

    else:

        return -1

    

def computeNearestNeighbor(username, users):

    """creates a sorted list of users based on their distance to username"""

    distances = []

    for user in users:

        if user != username:

            distance = cosSimilarity(users[user]
a73c
, users[username])

            distances.append((distance, user))

    # sort based on distance -- closest first

    #此处排序是从小到大排列,用余弦相似度,需取最后一个元素,明氏距离则相反

    distances.sort()

    print distances

    return distances

def recommend(username, users):

    """Give list of recommendations"""

    # first find nearest neighbor

    nearest = computeNearestNeighbor(username, users)[-1][1]#取最后一个元素

    print nearest

    recommendations = []

    # now find bands neighbor rated that user didn't

    neighborRatings = users[nearest]

    userRatings = users[username]

    for artist in neighborRatings:

        if not artist in userRatings:

            recommendations.append((artist, neighborRatings[artist]))

    # using the fn sorted for variety - sort is more efficient

    return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)

print recommend('Veronica',users)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息