您的位置:首页 > 编程语言 > Java开发

eclipse 编写mapreduce程序(wordCount)

2017-09-19 13:24 381 查看
package com.hadoop.senior.mapreduce;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class wordCount {

//step 1: Map class
public static class WordCountMapper extends
Mapper<LongWritable,Text,Text,IntWritable>{

private Text mapOutputkey = new Text();
private final static IntWritable mapOutputvalue= new IntWritable(1);

@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//line value
String lineValue= value.toString();

//split
//String[] strs =lineValue.split(" ");
StringTokenizer stringTokenizer= new StringTokenizer(lineValue);

//Iterator
while(stringTokenizer.hasMoreTokens()){
//get wordvalue
String wordValue=stringTokenizer.nextToken();
//set value
mapOutputkey.set(wordValue);
context.write(mapOutputkey,mapOutputvalue);
}
}
}

//step 2: Reduce class
public static class WordCountReducer extends
Reducer<Text,IntWritable,Text,IntWritable>{

private IntWritable outputValue = new IntWritable();

@Override
public void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
//sum tmp
int sum =0;
for(IntWritable value:values){
sum+=value.get();
}
//set value
outputValue.set(sum);
//output
context.write(key, outputValue);
}
}

//step 3: Driver component job
public  int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
//1 get Configuration
Configuration configuration= new Configuration();
//create job
Job job = Job.getInstance(configuration, this.getClass().getSimpleName());

//run jar
job.setJarByClass(this.getClass());

//3 set Job
//input	->map 	->reduce	->output
//3.1 input
Path inpath = new Path(args[1]);
FileInputFormat.addInputPath(job, inpath);
//3.2 map
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//3.3 reduce
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//3.4 output
Path outPath = new Path(args[2]);
FileOutputFormat.setOutputPath(job, outPath);
//3.5 submit job
boolean isSuccess =job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}

//run program
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//String[] args2 = new String[3];
//String[0]="wordCount";//类名
        //args2[1]="/usr/css/mapreduce/wordcount/input";
        //args2[2]="/usr/css/mapreduce/wordcount/output3";
              int status= new wordCount().run(args);
System.exit(status);
}

}


程序可以运行  本地/yarn

打包运行在yarn上运行

将程序打包参考链接:
http://jingyan.baidu.com/article/5bbb5a1b280d0113eba179ce.html
在hadoop上运行

1.jar包打包位置/opt/modules/hadoop-2.5.0-cdh5.3.6/jars(jar包命名example-stududy.jar)

2.运行这个wordCount程序要启动hadoop的集群

3.[root@VTU-01 hadoop-2.5.0-cdh5.3.6]# bin/yarn jar jars/example-stududy.jar wordCount /usr/css/mapreduce/wordcount/input /usr/css/mapreduce/wordcount/output

wordCount 是类的名字

/usr/css/mapreduce/wordcount/input 输入路径

/usr/css/mapreduce/wordcount/output 输出路径 不能存在

上述程序运行在yarn上,路径也是hdfs文件系统路径

/usr/css/mapreduce/wordcount/input 路径下要有写有单词的文件
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: