您的位置:首页 > 运维架构

hadoop WordCount源码

2016-07-13 17:20 232 查看
前提为:TokenizerMapper、IntSumReducer、IntSumReducer、Text、IntWritable 类在同一个目录下

package org.apache.hadoop.examples;

import java.io.PrintStream;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount

{

  public static void main(String[] args)

    throws Exception

  {

    Configuration conf = new Configuration();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length < 2) {

      System.err.println("Usage: wordcount <in> [<in>...] <out>");

      System.exit(2);

    }

    Job job = new Job(conf, "word count");

    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);

    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(IntWritable.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {

      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));

    }

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[(otherArgs.length - 1)]));

    System.exit((job.waitForCompletion(true)) ? 0 : 1);

  }

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: