您的位置：首页 > 其它

MapReduce实现QQ好友推荐

2017-10-12 18:55 288 查看

大家都知道qq用户量上亿，每个用户又有很多的好友，因此，数据量十分的庞大，如何才能实现QQ的好友推荐呢？
下面举一个例子：
A有QQ好友B
B有QQ好友C
则A，C有可能是好友。

当A登录的时候，则会向A推荐C，当C登录的时候，则会向C推荐A。

[java] view
plain copy

package com.FriendsRecommended.findFrends;



import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

import java.util.HashSet;

import java.util.Iterator;

import java.util.Set;



import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



/**

* @author Robin

* qq好友推荐

* Hadoop好友hello，hello好友是world...依次类推。

* 那么hadoop和world有共同的好友hello，所以hadoop和world可能具有好友关系，

* world就是hadoop的推荐好友,hadoop也是world的好友推荐。

* 计算出qq文件内符合上述条件的推荐好友

*

* 好友文件 :friends.txt

* hadoop hello

* hdfs world

* tom cat

* cat dog

* hello world

* hello hdfs

*/

public class FindFriends {

    /*

     *  Mapping结果：

     * hadoop hello
<
1a696
li class="alt" style="border-top:none;border-right:none;border-bottom:none;border-left:3px solid rgb(108,226,108);color:inherit;line-height:18px;margin:0px !important;padding:0px 3px 0px 10px !important;list-style-position:outside !important;">
     *  hello hadoop

        hdfs world

        world hdfs

        tom cat

        cat tom

        cat dog

        dog cat

        hello world

        world hello

        hello hdfs

        hdfs hello

     */

    public static class FindFriendsMapper extends Mapper<LongWritable, Text, Text, Text> {

        @Override

        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)

                throws IOException, InterruptedException {

            String line = value.toString();

            String array[] = line.split("\\s+");

            context.write(new Text(array[0]), new Text(array[1]));

            context.write(new Text(array[1]), new Text(array[0]));

        }

    }



    /*

     *maping之后reduce之前，Shuffling进行洗牌，把相同key的整理在一起（默认的shuffling），结果如下：

     *

     * 洗牌结果(输入到reduce)：

     *hadoop hello

     *

     *hello hadoop

     *hello world

     *hello hdfs

     *

     *hdfs world

     *hdfs hello

     *

     *tom cat

     *

     *cat tom

     *cat dog

     *

     *dog cat

     */



    // Reducing进行笛卡尔乘积计算

    public static class FindFriendsReduce extends Reducer<Text, Text, Text, Text> {

        @Override

        protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)

                throws IOException, InterruptedException {

            // 去重

            Set<String> set = new HashSet<String>();

            for (Text v : values) {

                set.add(v.toString());

            }



            if (set.size() > 1) {

                for (Iterator<String> i = set.iterator(); i.hasNext();) {

                    String qqName = i.next();

                    for (Iterator<String> j = set.iterator(); j.hasNext();) {

                        String otherQqName = j.next();

                        if (!qqName.equals(otherQqName)) {

                            context.write(new Text(qqName), new Text(otherQqName));

                        }

                    }

                }

            }

        }

    }



    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {

        final String INPUT_PATH = "hdfs://192.168.24.203:9000/user/FriendsRecommended/FriendsRecommended.txt";

        final String OUTPUT_PATH = "hdfs://192.168.24.203:9000/user/FriendsRecommended/out";

         /**

         * Configuration：map/reduce的j配置类，向hadoop框架描述map-reduce执行的工作

         */

        Configuration conf = new Configuration();



        final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);

        if(fileSystem.exists(new Path(OUTPUT_PATH))) {

            fileSystem.delete(new Path(OUTPUT_PATH), true);

        }



        Job job = Job.getInstance(conf, "FindFriends"); //设置一个用户定义的job名称

        job.setJarByClass(FindFriends.class);

        job.setMapperClass(FindFriendsMapper.class); //为job设置Mapper类

//      job.setCombinerClass(IntSumReducer.class);    //为job设置Combiner类

        job.setReducerClass(FindFriendsReduce.class); //为job设置Reducer类

        job.setOutputKeyClass(Text.class);        //为job的输出数据设置Key类

        job.setOutputValueClass(Text.class);    //为job输出设置value类



        FileInputFormat.addInputPath(job, new Path(INPUT_PATH));

        FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));



        System.exit(job.waitForCompletion(true) ?0 : 1);        //运行job

    }



}

最后计算结果：

[java] view
plain copy

tom dog

dog tom

hello   world

world   hello

hdfs    world

hdfs    hadoop

world   hdfs

world   hadoop

hadoop hdfs

hadoop world

hello   hdfs

hdfs    hello

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航