eclipse 编写mapreduce程序(wordCount)
2017-09-19 13:24
381 查看
package com.hadoop.senior.mapreduce; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class wordCount { //step 1: Map class public static class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{ private Text mapOutputkey = new Text(); private final static IntWritable mapOutputvalue= new IntWritable(1); @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //line value String lineValue= value.toString(); //split //String[] strs =lineValue.split(" "); StringTokenizer stringTokenizer= new StringTokenizer(lineValue); //Iterator while(stringTokenizer.hasMoreTokens()){ //get wordvalue String wordValue=stringTokenizer.nextToken(); //set value mapOutputkey.set(wordValue); context.write(mapOutputkey,mapOutputvalue); } } } //step 2: Reduce class public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>{ private IntWritable outputValue = new IntWritable(); @Override public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { //sum tmp int sum =0; for(IntWritable value:values){ sum+=value.get(); } //set value outputValue.set(sum); //output context.write(key, outputValue); } } //step 3: Driver component job public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException{ //1 get Configuration Configuration configuration= new Configuration(); //create job Job job = Job.getInstance(configuration, this.getClass().getSimpleName()); //run jar job.setJarByClass(this.getClass()); //3 set Job //input ->map ->reduce ->output //3.1 input Path inpath = new Path(args[1]); FileInputFormat.addInputPath(job, inpath); //3.2 map job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //3.3 reduce job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //3.4 output Path outPath = new Path(args[2]); FileOutputFormat.setOutputPath(job, outPath); //3.5 submit job boolean isSuccess =job.waitForCompletion(true); return isSuccess ? 0 : 1; } //run program public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { //String[] args2 = new String[3]; //String[0]="wordCount";//类名 //args2[1]="/usr/css/mapreduce/wordcount/input"; //args2[2]="/usr/css/mapreduce/wordcount/output3"; int status= new wordCount().run(args); System.exit(status); } }
程序可以运行 本地/yarn
打包运行在yarn上运行
将程序打包参考链接:
http://jingyan.baidu.com/article/5bbb5a1b280d0113eba179ce.html
在hadoop上运行
1.jar包打包位置/opt/modules/hadoop-2.5.0-cdh5.3.6/jars(jar包命名example-stududy.jar)
2.运行这个wordCount程序要启动hadoop的集群
3.[root@VTU-01 hadoop-2.5.0-cdh5.3.6]# bin/yarn jar jars/example-stududy.jar wordCount /usr/css/mapreduce/wordcount/input /usr/css/mapreduce/wordcount/output
wordCount 是类的名字
/usr/css/mapreduce/wordcount/input 输入路径
/usr/css/mapreduce/wordcount/output 输出路径 不能存在
上述程序运行在yarn上,路径也是hdfs文件系统路径
/usr/css/mapreduce/wordcount/input 路径下要有写有单词的文件
相关文章推荐
- Ubuntu14.04中eclipse下编写mapreduce例子程序WordCount
- 用Python编写MapReduce的WordCount实例程序
- 第一个MapReduce程序----wordcount(编写并运行)
- win7下安装hadoop 2.6.0 的eclipse插件并编写运行WordCount程序
- scala-eclipse 编写spark简单程序 WordCount
- MapReduce编写wordcount程序代码实现
- 在eclipse上运行MapReduce的wordcount程序所遇到的问题
- Hadoop之Mapreduce------>入门级程序WordCount代码编写
- Eclipse下运行hadoop自带的mapreduce程序--wordcount
- hadoop学习之HDFS(2.1):linux下eclipse中配置hadoop-mapreduce开发环境并运行WordCount.java程序
- 如何编写最简单的MapReduce之WordCount程序
- mapreduce程序编写(WordCount)
- 用eclipse编写MapReduce程序的基本要点
- Linux下使用Eclipse编写MapReduce程序的配置
- Hadoop(4-2)-MapReduce程序案例-WordCount(Intellij Idea环境)
- eclipse添加hadoop插件+WordCount程序
- Eclipse+Maven+Scala Project+Spark | 编译并打包wordcount程序
- Hadoop 用Eclipse来Mapreduce WordCount实战(1)