MapReduce实现QQ好友推荐
2017-10-12 18:55
288 查看
大家都知道qq用户量上亿,每个用户又有很多的好友,因此,数据量十分的庞大,如何才能实现QQ的好友推荐呢?
下面举一个例子:
A有QQ好友B
B有QQ好友C
则A,C有可能是好友。
当A登录的时候,则会向A推荐C,当C登录的时候,则会向C推荐A。
[java] view
plain copy
package com.FriendsRecommended.findFrends;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author Robin
* qq好友推荐
* Hadoop好友hello,hello好友是world...依次类推。
* 那么hadoop和world有共同的好友hello,所以hadoop和world可能具有好友关系,
* world就是hadoop的推荐好友,hadoop也是world的好友推荐。
* 计算出qq文件内符合上述条件的推荐好友
*
* 好友文件 :friends.txt
* hadoop hello
* hdfs world
* tom cat
* cat dog
* hello world
* hello hdfs
*/
public class FindFriends {
/*
* Mapping结果:
* hadoop hello
<
1a696
li class="alt" style="border-top:none;border-right:none;border-bottom:none;border-left:3px solid rgb(108,226,108);color:inherit;line-height:18px;margin:0px !important;padding:0px 3px 0px 10px !important;list-style-position:outside !important;">
* hello hadoop
hdfs world
world hdfs
tom cat
cat tom
cat dog
dog cat
hello world
world hello
hello hdfs
hdfs hello
*/
public static class FindFriendsMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String array[] = line.split("\\s+");
context.write(new Text(array[0]), new Text(array[1]));
context.write(new Text(array[1]), new Text(array[0]));
}
}
/*
*maping之后reduce之前,Shuffling进行洗牌,把相同key的整理在一起(默认的shuffling),结果如下:
*
* 洗牌结果(输入到reduce):
*hadoop hello
*
*hello hadoop
*hello world
*hello hdfs
*
*hdfs world
*hdfs hello
*
*tom cat
*
*cat tom
*cat dog
*
*dog cat
*/
// Reducing进行笛卡尔乘积计算
public static class FindFriendsReduce extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// 去重
Set<String> set = new HashSet<String>();
for (Text v : values) {
set.add(v.toString());
}
if (set.size() > 1) {
for (Iterator<String> i = set.iterator(); i.hasNext();) {
String qqName = i.next();
for (Iterator<String> j = set.iterator(); j.hasNext();) {
String otherQqName = j.next();
if (!qqName.equals(otherQqName)) {
context.write(new Text(qqName), new Text(otherQqName));
}
}
}
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
final String INPUT_PATH = "hdfs://192.168.24.203:9000/user/FriendsRecommended/FriendsRecommended.txt";
final String OUTPUT_PATH = "hdfs://192.168.24.203:9000/user/FriendsRecommended/out";
/**
* Configuration:map/reduce的j配置类,向hadoop框架描述map-reduce执行的工作
*/
Configuration conf = new Configuration();
final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
if(fileSystem.exists(new Path(OUTPUT_PATH))) {
fileSystem.delete(new Path(OUTPUT_PATH), true);
}
Job job = Job.getInstance(conf, "FindFriends"); //设置一个用户定义的job名称
job.setJarByClass(FindFriends.class);
job.setMapperClass(FindFriendsMapper.class); //为job设置Mapper类
// job.setCombinerClass(IntSumReducer.class); //为job设置Combiner类
job.setReducerClass(FindFriendsReduce.class); //为job设置Reducer类
job.setOutputKeyClass(Text.class); //为job的输出数据设置Key类
job.setOutputValueClass(Text.class); //为job输出设置value类
FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
System.exit(job.waitForCompletion(true) ?0 : 1); //运行job
}
}
最后计算结果:
[java] view
plain copy
tom dog
dog tom
hello world
world hello
hdfs world
hdfs hadoop
world hdfs
world hadoop
hadoop hdfs
hadoop world
hello hdfs
hdfs hello
下面举一个例子:
A有QQ好友B
B有QQ好友C
则A,C有可能是好友。
当A登录的时候,则会向A推荐C,当C登录的时候,则会向C推荐A。
[java] view
plain copy
package com.FriendsRecommended.findFrends;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author Robin
* qq好友推荐
* Hadoop好友hello,hello好友是world...依次类推。
* 那么hadoop和world有共同的好友hello,所以hadoop和world可能具有好友关系,
* world就是hadoop的推荐好友,hadoop也是world的好友推荐。
* 计算出qq文件内符合上述条件的推荐好友
*
* 好友文件 :friends.txt
* hadoop hello
* hdfs world
* tom cat
* cat dog
* hello world
* hello hdfs
*/
public class FindFriends {
/*
* Mapping结果:
* hadoop hello
<
1a696
li class="alt" style="border-top:none;border-right:none;border-bottom:none;border-left:3px solid rgb(108,226,108);color:inherit;line-height:18px;margin:0px !important;padding:0px 3px 0px 10px !important;list-style-position:outside !important;">
* hello hadoop
hdfs world
world hdfs
tom cat
cat tom
cat dog
dog cat
hello world
world hello
hello hdfs
hdfs hello
*/
public static class FindFriendsMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String array[] = line.split("\\s+");
context.write(new Text(array[0]), new Text(array[1]));
context.write(new Text(array[1]), new Text(array[0]));
}
}
/*
*maping之后reduce之前,Shuffling进行洗牌,把相同key的整理在一起(默认的shuffling),结果如下:
*
* 洗牌结果(输入到reduce):
*hadoop hello
*
*hello hadoop
*hello world
*hello hdfs
*
*hdfs world
*hdfs hello
*
*tom cat
*
*cat tom
*cat dog
*
*dog cat
*/
// Reducing进行笛卡尔乘积计算
public static class FindFriendsReduce extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// 去重
Set<String> set = new HashSet<String>();
for (Text v : values) {
set.add(v.toString());
}
if (set.size() > 1) {
for (Iterator<String> i = set.iterator(); i.hasNext();) {
String qqName = i.next();
for (Iterator<String> j = set.iterator(); j.hasNext();) {
String otherQqName = j.next();
if (!qqName.equals(otherQqName)) {
context.write(new Text(qqName), new Text(otherQqName));
}
}
}
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
final String INPUT_PATH = "hdfs://192.168.24.203:9000/user/FriendsRecommended/FriendsRecommended.txt";
final String OUTPUT_PATH = "hdfs://192.168.24.203:9000/user/FriendsRecommended/out";
/**
* Configuration:map/reduce的j配置类,向hadoop框架描述map-reduce执行的工作
*/
Configuration conf = new Configuration();
final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
if(fileSystem.exists(new Path(OUTPUT_PATH))) {
fileSystem.delete(new Path(OUTPUT_PATH), true);
}
Job job = Job.getInstance(conf, "FindFriends"); //设置一个用户定义的job名称
job.setJarByClass(FindFriends.class);
job.setMapperClass(FindFriendsMapper.class); //为job设置Mapper类
// job.setCombinerClass(IntSumReducer.class); //为job设置Combiner类
job.setReducerClass(FindFriendsReduce.class); //为job设置Reducer类
job.setOutputKeyClass(Text.class); //为job的输出数据设置Key类
job.setOutputValueClass(Text.class); //为job输出设置value类
FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
System.exit(job.waitForCompletion(true) ?0 : 1); //运行job
}
}
最后计算结果:
[java] view
plain copy
tom dog
dog tom
hello world
world hello
hdfs world
hdfs hadoop
world hdfs
world hadoop
hadoop hdfs
hadoop world
hello hdfs
hdfs hello
相关文章推荐
- 用hadoop2.7.1 mapreduce实现QQ好友推荐功能
- MapReduce实现QQ好友推荐
- mapreduce实现QQ好友推荐
- MapReduce实现QQ好友推荐
- 使用Python MrJob的MapReduce实现电影推荐系统
- mapreduce实现推荐系统
- 使用Python MrJob的MapReduce实现电影推荐系统
- 通过mapReduce实现基于项目的协同过滤推荐
- 基于MapReduce的ItemBase推荐算法的共现矩阵实现(一) 推荐
- Hadoop 2.6 使用MapReduce实现基于物品的推荐系统
- 用Hadoop流实现mapreduce版推荐系统基于物品的协同过滤算法
- Hadoop 2.6 使用MapReduce实现基于内容的推荐系统
- Hadoop 2.6 使用MapReduce实现基于用户的推荐系统
- MapReduce实现单表关联(微博关注推荐)
- 使用Python MrJob的MapReduce实现电影推荐系统
- MapReduce之推荐算法实现
- MapReduce实现之 好友推荐
- 使用Python MrJob的MapReduce实现电影推荐系统
- 一些算法的MapReduce实现——好友推荐
- MapReduce实现推荐系统