您的位置:首页 > 运维架构 > Apache

Hadoop 分析Apache日志记录 URLlog日志分析

2016-02-16 20:27 309 查看
Hadoop 分析Apache日志记录 URLlog日志分析:

1、GET方式的URL出现的次数

2、PUT方式的URL出现的次数

数据文件

[root@master IMFdatatest]#hadoop dfs -cat /library/URLLog.txt

DEPRECATED: Use of this script to execute hdfs command is deprecated.

Instead use the hdfs command for it.

16/02/16 07:23:53 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

127.0.0.1 - - [03/Jul/2015:23:36:38 +0800] "GET /course/detail/3.htm HTTP/1.0" 200 38435 0.038

182.131.89.195 - - [03/Jul/2015:23:37:43 +0800] "GET / HTTP/1.0" 301 - 0.000

127.0.0.1 - - [03/Jul/2015:23:38:27 +0800] "POST /service/notes/addViewTimes_23.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2015:23:39:03 +0800] "GET /html/notes/20140617/779.html HTTP/1.0" 200 69539 0.046

127.0.0.1 - - [03/Jul/2015:23:43:00 +0800] "GET /html/notes/20140318/24.html HTTP/1.0" 200 67171 0.049

127.0.0.1 - - [03/Jul/2015:23:43:59 +0800] "POST /service/notes/addViewTimes_779.htm HTTP/1.0" 200 1 0.003

127.0.0.1 - - [03/Jul/2015:23:45:51 +0800] "GET / HTTP/1.0" 200 70044 0.060

127.0.0.1 - - [03/Jul/2015:23:46:17 +0800] "GET /course/list/73.htm HTTP/1.0" 200 12125 0.010

127.0.0.1 - - [03/Jul/2015:23:46:58 +0800] "GET /html/notes/20140609/542.html HTTP/1.0" 200 94971 0.077

127.0.0.1 - - [03/Jul/2015:23:48:31 +0800] "POST /service/notes/addViewTimes_24.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2015:23:48:34 +0800] "POST /service/notes/addViewTimes_542.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2015:23:49:31 +0800] "GET /notes/index-top-3.htm HTTP/1.0" 200 53494 0.041

127.0.0.1 - - [03/Jul/2015:23:50:55 +0800] "GET /html/notes/20140609/544.html HTTP/1.0" 200 183694 0.076

127.0.0.1 - - [03/Jul/2015:23:53:32 +0800] "POST /service/notes/addViewTimes_544.htm HTTP/1.0" 200 2 0.004

127.0.0.1 - - [03/Jul/2015:23:54:53 +0800] "GET /html/notes/20140620/900.html HTTP/1.0" 200 151770 0.054

127.0.0.1 - - [03/Jul/2015:23:57:42 +0800] "GET /html/notes/20140620/872.html HTTP/1.0" 200 52373 0.034

127.0.0.1 - - [03/Jul/2015:23:58:17 +0800] "POST /service/notes/addViewTimes_900.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2015:23:58:51 +0800] "GET / HTTP/1.0" 200 70044 0.057[root@master IMFdatatest]#

运行结果

[root@master IMFdatatest]#hadoop dfs -cat /library/outputURLLog/part-r-00000

DEPRECATED: Use of this script to execute hdfs command is deprecated.

Instead use the hdfs command for it.

16/02/16 07:13:48 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

GET / 3

GET /course/detail/3.htm 1

GET /course/list/73.htm 1

GET /html/notes/20140318/24.html 1

GET /html/notes/20140609/542.html 1

GET /html/notes/20140609/544.html 1

GET /html/notes/20140617/779.html 1

GET /html/notes/20140620/872.html 1

GET /html/notes/20140620/900.html 1

GET /notes/index-top-3.htm 1

POST /service/notes/addViewTimes_23.htm 1

POST /service/notes/addViewTimes_24.htm 1

POST /service/notes/addViewTimes_542.htm 1

POST /service/notes/addViewTimes_544.htm 1

POST /service/notes/addViewTimes_779.htm 1

POST /service/notes/addViewTimes_900.htm 1

源代码

package com.dtspark.hadoop.hellomapreduce;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.FloatWritable;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;

import java.util.Arrays;

import java.util.Iterator;

import java.util.StringTokenizer;

public class URLLog {

public static class DataMapper

extends Mapper<LongWritable, Text, Text, LongWritable>{

private LongWritable resultValue = new LongWritable(1);

private Text text =new Text();

public void map(LongWritable key, Text value, Context context

) throws IOException, InterruptedException {

System.out.println("Map Methond Invoked!!!");

String line =value.toString();

String result = handleLine(line);

if (result != null && result.length() > 0 ){

text.set(result);

context.write(text, resultValue);

}

}

private String handleLine(String line) {

StringBuffer buffer = new StringBuffer();

if(line.length()>0){

if(line.contains("GET")){

buffer.append(line.substring(line.indexOf("GET"),line.indexOf("HTTP/1.0")).trim());

}else if ( line.contains("POST")) {

buffer.append(line.substring(line.indexOf("POST"),line.indexOf("HTTP/1.0")).trim());

}

}

return buffer.toString();

}

}

public static class DataReducer

extends Reducer<Text,LongWritable,Text, LongWritable> {

private LongWritable totalresultValue = new LongWritable(1);

public void reduce(Text key, Iterable<LongWritable> values,

Context context

) throws IOException, InterruptedException {

System.out.println("Reduce Methond Invoked!!!" );

int total =0;

for (LongWritable item : values){

total += item.get();

}

totalresultValue.set(total);

context.write(key, totalresultValue);

}

}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

if (otherArgs.length < 2) {

System.err.println("Usage: URLLog <in> [<in>...] <out>");

System.exit(2);

}

Job job = Job.getInstance(conf, "URLLog");

job.setJarByClass(URLLog.class);

job.setMapperClass(DataMapper.class);

job.setReducerClass(DataReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(LongWritable.class);

for (int i = 0; i < otherArgs.length - 1; ++i) {

FileInputFormat.addInputPath(job, new Path(otherArgs[i]));

}

FileOutputFormat.setOutputPath(job,

new Path(otherArgs[otherArgs.length - 1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: