简单 实现CombineFileInputFormat
2015-11-06 22:24
381 查看
import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.Reducer.Context; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat; import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader; import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.input.LineRecordReader; import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class TestCombine extends Configured implements Tool { private static class ProvinceMapper extends Mapper<Object, Text, Text, Text> { @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { System.out.println("value : " + value + " Context " + context); context.write(value, value); } } private static class ProvinceReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text va : values) { System.out.println("reduce " + key); context.write(key, key); } } } public static class CombineSequenceFileInputFormat<K, V> extends CombineFileInputFormat<K, V> { @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { return new CombineFileRecordReader((CombineFileSplit)split, context, CombineLineRecordReader.class); } } public static class CombineLineRecordReader<K, V> extends RecordReader<K, V> { private CombineFileSplit split; private TaskAttemptContext context; private int index; private RecordReader<K, V> rr; @SuppressWarnings("unchecked") public CombineLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException, InterruptedException { this.index = index; this.split = (CombineFileSplit) split; this.context = context; this.rr = (RecordReader<K, V>) ReflectionUtils.newInstance(LineRecordReader.class, context.getConfiguration()); } @SuppressWarnings("unchecked") @Override public void initialize(InputSplit curSplit, TaskAttemptContext curContext) throws IOException, InterruptedException { this.split = (CombineFileSplit) curSplit; this.context = curContext; if (null == rr) { rr = ReflectionUtils.newInstance(SequenceFileRecordReader.class, context.getConfiguration()); } FileSplit fileSplit = new FileSplit(this.split.getPath(index), this.split.getOffset(index), this.split.getLength(index), this.split.getLocations()); this.rr.initialize(fileSplit, this.context); } @Override public float getProgress() throws IOException, InterruptedException { return rr.getProgress(); } @Override public void close() throws IOException { if (null != rr) { rr.close(); rr = null; } } @Override public K getCurrentKey() throws IOException, InterruptedException { return rr.getCurrentKey(); } @Override public V getCurrentValue() throws IOException, InterruptedException { return rr.getCurrentValue(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return rr.nextKeyValue(); } } public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJobName("TestCombine"); job.setJarByClass(TestCombine.class); job.setMapperClass(ProvinceMapper.class); job.setReducerClass(ProvinceReducer.class); job.setInputFormatClass(CombineSequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); String inpath = "/home/hadoop/tmp/combine"; String outpath = "/home/hadoop/tmp/combineout"; Path p = new Path(outpath); FileSystem fs = FileSystem.get(conf); if (fs.exists(p)){ fs.delete(p); } FileInputFormat.addInputPaths(job, inpath); FileOutputFormat.setOutputPath(job, p); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { int ret = ToolRunner.run(new TestCombine(), args); System.exit(ret); } }
View Code
相关文章推荐
- 深度理解依赖注入(Dependence Injection)
- [Android基础]BroadcastReceiver
- Unity3D ref的用法 out的介绍
- OPENCV学习杂录
- (转)个例子让你了解Java反射机制
- Android之startService()和bindService()区别
- P2P(点到点-点对点)通信实现实例
- 进程控制理论
- Dreamweaver常用代码(转)
- Flume入门
- C#判断操作系统类型总结
- Android在处理图片减少出现OOM的方式
- 查看Android Developer文档
- mysql触发器
- lintcode 中等题:Segmemt Tree Build II
- 传智播客 Java基础 day07笔记
- hdoj GCD 2588 (欧拉函数)
- Android之路——第一步:Activity之间切换(Intent、点击事件)
- SDWebImage中的- (void)sd_setImageWithURL:(NSURL *)url方法分析
- war文件