您的位置:首页 > 其它

社交粉丝数据分析----求qq共同好友

2017-07-13 22:09 155 查看
需求:
以下是qq的好友列表数据,冒号前是一个用,冒号后是该用户的所有好友(数据中的好友关系是单向的)

A:B,C,D,F,E,O

B:A,C,E,K

C:F,A,D,I

D:A,E,F,L

E:B,C,D,M,L

F:A,B,C,D,E,O,M

G:A,C,D,E,F

H:A,C,D,E,O

I:A,O

J:B,O

K:A,C,D

L:D,E,F

M:E,F,G

O:A,H,I,J

求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?

第一阶段思路:我们先找出一个人被哪几个人共同拥有

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class ShareFriendsStepOne {

static class ShareFriendsMapper extends Mapper<LongWritable, Text, Text, Text> {

@Override
protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
// A:B,C,D,F,E,O
String value = values.toString();
String[] person_friends = value.split(":");
String person = person_friends[0];
String friends = person_friends[1];

for (String friend : friends.split(",")) {
//输出(友人)
context.write(new Text(friend), new Text(person));
}

}

}

static class ShareFriendsReduce extends Reducer<Text, Text, Text, Text> {
// (B,A) (C,A) (D,A)(E,A)(F,A)

@Override
protected void reduce(Text friend, Iterable<Text> persons, Context context)
throws IOException, InterruptedException {

StringBuffer sb = new StringBuffer();

for (Text person : persons) {
sb.append(person).append(",");

}
// 输出(友人,人,人,人)
context.write(friend, new Text(sb.toString()));
}
}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();
String path = "";
String path1 = "";
if (args.length == 2) {
path = args[0]; // 要处理是文件夹
path1 = args[1]; // 结果
}

Job job = Job.getInstance(conf);
job.setJarByClass(ShareFriendsStepOne.class);

job.setMapperClass(ShareFriendsMapper.class);
job.setReducerClass(ShareFriendsReduce.class);

job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

FileInputFormat.setInputPaths(job, new Path(path));
FileOutputFormat.setOutputPath(job, new Path(path1));

// 向yarn集群提交这个job
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);

}
}第一阶段:输出结果如下:

A I,K,C,B,G,F,H,O,D,

B A,F,J,E,

C A,E,B,H,F,G,K,

D G,C,K,A,L,F,E,H,

E G,M,L,H,A,F,B,D,

F L,M,D,C,G,A,

G M,

H O,

I O,C,

J O,

K B,

L D,E,

M E,F,

O A,H,I,J,F,
第二阶段:遍历第一阶段的结果,key是两个人,value是他们拥有的共同的一个好友

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SharedFriendsStepTwo {

static class SharedFriendsStepTwoMapper extends Mapper<LongWritable, Text, Text, Text> {

// 拿到的数据是上一个步骤的输出结果
// A I,K,C,B,G,F,H,O,D,
// 友 人,人,人
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line = value.toString();
String[] friend_persons = line.split("\t");

String friend = friend_persons[0];
String[] persons = friend_persons[1].split(",");

Arrays.sort(persons);

for (int i = 0; i < persons.length - 1; i++) {
for (int j = i + 1; j < persons.length; j++) {
// 发出 <人-人,好友> ,这样,相同的“人-人”对的所有好友就会到同1个reduce中去
context.write(new Text(persons[i] + "-" + persons[j]), new Text(friend));
}

}

}

}

static class SharedFriendsStepTwoReducer extends Reducer<Text, Text, Text, Text> {

@Override
protected void reduce(Text person_person, Iterable<Text> friends, Context context)
throws IOException, InterruptedException {

StringBuffer sb = new StringBuffer();

for (Text friend : friends) {
sb.append(friend).append(" ");

}
context.write(person_person, new Text(sb.toString()));
}

}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();
String path = "";
String path1 = "";
if (args.length == 2) {
path = args[0]; // 要处理是文件夹
path1 = args[1]; // 结果
}

Job job = Job.getInstance(conf);
job.setJarByClass(SharedFriendsStepTwo.class);

job.setMapperClass(SharedFriendsStepTwoMapper.class);
job.setReducerClass(SharedFriendsStepTwoReducer.class);

job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

FileInputFormat.setInputPaths(job, new Path(path));
FileOutputFormat.setOutputPath(job, new Path(path1));

// 向yarn集群提交这个job
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : 1);

}

}


第二阶段输出结果:
A-B E C 

A-C D F 

A-D E F 

A-E D B C 

A-F O B C D E 

A-G F E C D 

A-H E C D O 

A-I O 

A-J O B 

A-K D C 

A-L F E D 

A-M E F 

B-C A 

B-D A E 

B-E C 

B-F E A C 

B-G C E A 

B-H A E C 

B-I A 

B-K C A 

B-L E 

B-M E 

B-O A 

C-D A F 

C-E D 

C-F D A 

C-G D F A 

C-H D A 

C-I A 

C-K A D 

C-L D F 

C-M F 

C-O I A 

D-E L 

D-F A E 

D-G E A F 

D-H A E 

D-I A 

D-K A 

D-L E F 

D-M F E 

D-O A 

E-F D M C B 

E-G C D 

E-H C D 

E-J B 

E-K C D 

E-L D 

F-G D C A E 

F-H A D O E C 

F-I O A 

F-J B O 

F-K D C A 

F-L E D 

F-M E 

F-O A 

G-H D C E A 

G-I A 

G-K D A C 

G-L D F E 

G-M E F 

G-O A 

H-I O A 

H-J O 

H-K A C D 

H-L D E 

H-M E 

H-O A 

I-J O 

I-K A 

I-O A 

K-L D 

K-O A 

L-M E F 
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息