您的位置:首页 > 运维架构 > 网站架构

036 关于网站的UV分析

2016-11-01 19:51 309 查看
一:准备

1.统计的维度

  guid

  tracktime

  provice

2.key与value的设定

  key:date+provice_guid

  value:NullWritable

3.案例分析

  表示某天某个省份的某个人无论访问网站多少次,仅仅记做一次访问统计

  UV:统计页面访问的总人数---》userID对于用户进行去重

二:程序

1.map程序

  


  

2.reduce程序

  


3.结果

  


4.理解点

  1)怎么去重

    数据key的形式:date+provice_guid。

    当guid是相同的时候,在shuffle的group分组时,key被分组,一起的放在一起,而value则是nullwritable,没有使用value。

    所以到达reduce的时候,数据已经被去重了。

  2)NullWritable.get()

    使用反射,获得NullWritable的对象。

5.完整程序

1 package com.senior.network;
2
3 import java.io.IOException;
4 import java.util.HashMap;
5 import java.util.Map;
6 import java.util.Set;
7
8 import org.apache.commons.lang.StringUtils;
9 import org.apache.hadoop.conf.Configuration;
10 import org.apache.hadoop.conf.Configured;
11 import org.apache.hadoop.fs.Path;
12 import org.apache.hadoop.io.IntWritable;
13 import org.apache.hadoop.io.LongWritable;
14 import org.apache.hadoop.io.NullWritable;
15 import org.apache.hadoop.io.Text;
16 import org.apache.hadoop.mapreduce.Job;
17 import org.apache.hadoop.mapreduce.Mapper;
18 import org.apache.hadoop.mapreduce.Mapper.Context;
19 import org.apache.hadoop.mapreduce.Reducer;
20 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
21 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
22 import org.apache.hadoop.util.Tool;
23 import org.apache.hadoop.util.ToolRunner;
24
25 public class WebUvCount extends Configured implements Tool{
26     //Mapper
27     public static class WebUvCountMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
28         private Text mapoutputkey=new Text();
29         @Override
30         protected void cleanup(Context context) throws IOException,InterruptedException {
31
32         }
33         @Override
34         protected void setup(Context context) throws IOException,InterruptedException {
35
36         }
37
38         @Override
39         protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
40             String lineValue=value.toString();
41             String[] strs=lineValue.split("\t");
42             if(30>strs.length){
43                 context.getCounter("webPvMapper_counter", "length_LT_30").increment(1L);
44                 return;
45             }
46             String guidValue=strs[5];    //
47             if(StringUtils.isEmpty(guidValue)){
48                 return;
49             }
50             String trackTimeValue=strs[17];
51             if(StringUtils.isEmpty(trackTimeValue)){
52                 return;
53             }
54             String dateVAlue=trackTimeValue.substring(0,13);//
55             String priviceIdValue=strs[23];
56
57             Integer priviceId=Integer.MAX_VALUE;
58             try{
59                 priviceId = Integer.valueOf(priviceIdValue);  //
60             }catch(Exception e){
61                 return;
62             }
63
64             mapoutputkey.set(dateVAlue+"\t"+priviceIdValue+"_"+guidValue);
65             context.write(mapoutputkey,NullWritable.get());
66         }
67
68     }
69
70
71
72     //Reducer
73     public static class WebUvCountReducer extends Reducer<Text,NullWritable,Text,IntWritable>{
74         private Text outputkey=new Text();
75         private Map<String,Integer> dateMap;
76         private IntWritable outputvalue=new IntWritable();
77
78         @Override
79         protected void setup(Context context)throws IOException, InterruptedException {
80             dateMap=new HashMap<String,Integer>();
81         }
82
83         @Override
84         protected void reduce(Text key, Iterable<NullWritable> values,Context context)throws IOException, InterruptedException {
85             String date=key.toString().split("_")[0];
86             if(dateMap.containsKey(date)){
87                 Integer previousUV=dateMap.get(date);
88                 Integer uv=previousUV+1;
89                 dateMap.put(date, uv);
90             }else{
91                 dateMap.put(date, 1);
92             }
93         }
94
95         @Override
96         protected void cleanup(Context context)throws IOException, InterruptedException {
97             Set<String> dateSet=dateMap.keySet();
98             for(String date:dateSet){
99                 Integer uv=dateMap.get(date);
100                 outputkey.set(date);
101                 outputvalue.set(uv);
102                 context.write(outputkey, outputvalue);
103             }
104         }
105
106
107     }
108
109     //Driver
110     public int run(String[] args)throws Exception{
111         Configuration conf=this.getConf();
112         Job job=Job.getInstance(conf,this.getClass().getSimpleName());
113         job.setJarByClass(WebUvCount.class);
114         //input
115         Path inpath=new Path(args[0]);
116         FileInputFormat.addInputPath(job, inpath);
117
118         //output
119         Path outpath=new Path(args[1]);
120         FileOutputFormat.setOutputPath(job, outpath);
121
122         //map
123         job.setMapperClass(WebUvCountMapper.class);
124         job.setMapOutputKeyClass(Text.class);
125         job.setMapOutputValueClass(NullWritable.class);
126
127         //shuffle
128
129         //reduce
130         job.setReducerClass(WebUvCountReducer.class);
131         job.setOutputKeyClass(Text.class);
132         job.setOutputValueClass(IntWritable.class);
133
134         //submit
135         boolean isSucess=job.waitForCompletion(true);
136         return isSucess?0:1;
137     }
138
139     //main
140     public static void main(String[] args)throws Exception{
141         Configuration conf=new Configuration();
142         //compress
143         conf.set("mapreduce.map.output.compress", "true");
144         conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
145         args=new String[]{
146                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/inputWebData",
147                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/outputWebData6"
148         };
149         int status=ToolRunner.run(new WebUvCount(), args);
150         System.exit(status);
151     }
152
153 }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: