hadoop WordCount源码
2016-07-13 17:20
232 查看
前提为:TokenizerMapper、IntSumReducer、IntSumReducer、Text、IntWritable 类在同一个目录下
package org.apache.hadoop.examples;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount
{
public static void main(String[] args)
throws Exception
{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[(otherArgs.length - 1)]));
System.exit((job.waitForCompletion(true)) ? 0 : 1);
}
}
package org.apache.hadoop.examples;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount
{
public static void main(String[] args)
throws Exception
{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[(otherArgs.length - 1)]));
System.exit((job.waitForCompletion(true)) ? 0 : 1);
}
}
相关文章推荐
- 通过DeveloperApi获取spark程序执行进度及异常
- hadoop format后启动不了
- grep设置查找颜色高亮
- Xshell use
- file_operation
- 服务架构演进
- 我与即时通讯 - 重构之路
- [笔记]linux下和windows下的 创建线程函数
- Hadoop家族学习路线图
- Java——调用Linux和Windows下命令行执行程序
- epoll系列系统调用
- CentOS7 卡在开机界面进不去登录界面
- apache commons fileupload 依赖信息(2016-07-01更新)
- Nginx源码分析 - 主流程篇 - 模块的初始化
- J2EE 1.3 Tomcat的安装
- TypeError: invalid 'in' operand obj
- PopUpWindow使用详解(二)——进阶及答疑
- Android 蓝牙架构学习 博客链接
- Perf -- Linux下的系统性能调优神器
- 架构设计:前后端分离之Web前端架构设计