wordcount的mr java代码
2017-04-10 14:11
411 查看
package wordcount; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> { // define the constant number ONE private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } /** * the reducer class calculate the count sum of each word * INPUT: KEY: text VALUE: IntWritable * OUTPUT:KEY:text VALUE: IntWritable */ public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); // values are the set of each node or file public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } /** * * @param args * @throws Exception * one arguments is the input path the other arguments is the * output path */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Wordcount <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "Word count"); job.setJarByClass(WordCount.class); job.setJobName("Word count test"); job.setMapperClass(TokenizerMapper.class); // combiner use the same method as reducer and calculate the local count // sum job.setCombinerClass(IntSumReducer.class); // reducer task get the total count of each word job.setReducerClass(IntSumReducer.class); // set the output KEY and VALUE type job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // use the arguments to set the job input and output path FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
相关文章推荐
- hadoop学习之WordCount.java代码解读
- hadoop学习之WordCount.java代码解读
- spark streaming 接收 kafka 数据java代码WordCount示例
- 010-spark standalone模式JAVA版本WordCount代码
- spark streaming 接收 kafka 数据java代码WordCount示例
- spark streaming 接收 kafka 数据java代码WordCount示例
- java编写的hadoop wordcount,单MR任务实现按照词频排序输出结果
- hadoop学习之WordCount.java代码解读
- hadoop学习之WordCount.java代码解读
- 分别用Java、Scala、spark-shell开发wordcount程序及测试代码
- Java根据word模板生成word文档之后台解析和实现及部分代码(三)F
- Java根据word模板生成word文档之后台解析和实现及部分代码(一)
- java在线预览txt、word、ppt、execel,pdf代码
- wordcount.java
- Java根据word模板生成word文档之后台解析和实现及部分代码(三)G
- java在线预览txt、word、ppt、execel,pdf代码
- Hadoop集群环境测试-WordCount.java-上篇
- 解决Eclipse中运行WordCount出现 java.lang.ClassNotFoundException: org.apache.hadoop.examples.WordCount$TokenizerMapper问题【转】
- wordcount.java
- Java创建RTF格式的WORD文件代码示例