基于用户的最近邻推荐
2013-10-22 16:51
176 查看
课程 Python代码:
代码数据连接:https://www.dropbox.com/s/78ifrycp9x1238i/UserUserRec.rar
__author__ = 'LiFeiteng(Email: lifeiteng0422@gmail.com)' # -*- coding: utf-8 -*- import numpy as np class UserUserRec: def __init__(self): self.U = 0 # user number self.M = 0 # movie number self.user_dict = {} self.movie_dict = {} self.movie_title = {} self.user_ratings = np.matrix([]) def GetRatingData(self, ratings_file): for line in open(ratings_file): user, movie, rating = line.split(",") if not self.user_dict.has_key(user): self.user_dict[user] = self.U self.U += 1 if not self.movie_dict.has_key(movie): self.movie_dict[movie] = self.M self.M += 1 print self.U, self.M self.user_ratings = np.matrix(np.zeros([self.U, self.M])) for line in open("ratings.csv", "r"): user, movie, rating = line.split(",") self.user_ratings[self.user_dict[user], self.movie_dict[movie]] = np.double(rating) def GetMovieTitles(self, movie_titles_file): for line in open(movie_titles_file): movie, title = line.split(",") #delete '\n' self.movie_title[movie] = title[:-1] def CosineUserSim(self, user1, user2): '''用户相似性计算 useri 为评分矩阵对应的行号''' user_rat = self.user_ratings[user1,:].copy() u1 = user_rat - np.mean(user_rat[user_rat>0.0]) u1 = np.array(u1)*np.array(np.where(user_rat>0, 1, 0)) user_rat = self.user_ratings[user2,:].copy() u2 = user_rat - np.mean(user_rat[user_rat>0.0]) u2 = np.array(u2)*np.array(np.where(user_rat>0, 1, 0)) if (np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:])) == 0: sim = 0.0 else: sim = np.dot(u1[0,:],u2[0,:])/(np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:])) return np.double(sim) def MovieScore4User(self, user, movie): '''基于用户的推荐,根据user最相似的30位其他用户预测user对movie的rating''' rating4movie = self.user_ratings[:, self.movie_dict[movie]] Temp = [] userID = 0 for rating in rating4movie: if rating != 0.0: Temp.append([userID, rating, self.CosineUserSim(self.user_dict[user], userID)]) userID += 1 Temp = sorted(Temp, key=lambda e:e[2], reverse=True) n = 0 sim_add = 0.0 score4movie = 0.0 for data in Temp: if n >= 30: break userID = data[0] rat = data[1] if userID != self.user_dict[user] and rat != 0.0: sim = data[2] user_rat = self.user_ratings[userID,:].copy() mu = np.mean(user_rat[user_rat > 0.0]) score4movie += (rat-mu) * sim sim_add += np.abs(sim) n += 1 score4movie /= sim_add user_rat = self.user_ratings[self.user_dict[user],:].copy() score4movie += np.mean(user_rat[user_rat > 0.0]) score4movie = np.double(score4movie) print ",".join([user, movie, format(score4movie,".4f"), self.movie_title[movie]]) return score4movie # end of class UserUserRec if __name__ == '__main__': #### PA3 user_user_rec = UserUserRec() user_user_rec.GetRatingData("ratings.csv") user_user_rec.GetMovieTitles("movie-titles.csv") outfile = open("outfile.txt","w") for line in open("input.txt"):# input user, movie = line.split(":") movie = str(int(movie)) score = user_user_rec.MovieScore4User(user, movie) str1 = ",".join([user, movie, format(score, ".4f"), user_user_rec.movie_title[movie]]) outfile.write(str1+"\n") outfile.close()
代码数据连接:https://www.dropbox.com/s/78ifrycp9x1238i/UserUserRec.rar
相关文章推荐
- 最近邻居推荐系统原理和基于用户的评分预测推荐
- 【推荐系统】协同过滤之基于用户的最近邻推荐
- 推荐系统-基于用户的最近邻协同过滤算法(MovieLens数据集)
- 推荐系统-基于用户的最近邻协同过滤算法(MovieLens数据集)
- 多功能复合机基于用户认证功能的实现过程详解 推荐
- R语言实战实现基于用户的简单的推荐系统(数量较少)
- 如何将用户表示成机器可以处理的形式(基于内容的推荐)
- Postfix-2.11+Dovecot-2.0.9+MySQL+Cyrus-sasl+Extmail-1.2实现基于虚拟用户的邮件系统架构 推荐
- 基于用户点击行为的新闻个性推荐
- 基于用户的推荐和基于物品的推荐
- 实战智能推荐系统(7)-- 基于用户的协同过滤算法
- 推荐系统读书笔记一--基于用户行为的推荐
- Linux网络管理之二:基于用户级别的samba服务器配置 推荐
- 基于用户的协同过滤(user-based CF)推荐系统【2】
- 协同过滤之 一、基于用户最近邻的推荐算法
- 构建基于用户的检索和推荐
- 推荐系统(二) —— 利用用户行为数据 —— 基于领域的算法
- 推荐系统--基于用户的协同过滤算法
- 基于用户的协同过滤推荐模型
- 基于用户点击行为的新闻个性推荐