您的位置:首页 > 产品设计 > UI/UE

SequcenFileInputFormat SequenceFileOutputFormat例子

2013-11-30 21:00 387 查看
1)输出格式为SequenceFileOutputFormat

public class SequenceFileOutputFormatDemo extends Configured implements Tool {

    public static class SequenceFileOutputFormatDemoMapper extends

            Mapper<LongWritable, Text, LongWritable, Text> {

        public void map(LongWritable key, Text value, Context context)

                throws IOException, InterruptedException {

            context.write(key, value);

        }

    }

    public static void main(String[] args) throws Exception {

        int nRet = ToolRunner.run(new Configuration(),

                new SequenceFileOutputFormatDemo(), args);

        System.out.println(nRet);

    }

    @Override

    public int run(String[] args) throws Exception {

        // TODO Auto-generated method stub

        Configuration conf = getConf();

        Job job = new Job(conf, "sequence file output demo ");

        job.setJarByClass(SequenceFileOutputFormatDemo.class);

        FileInputFormat.addInputPaths(job, args[0]);

        HdfsUtil.deleteDir(args[1]);

        job.setMapperClass(SequenceFileOutputFormatDemoMapper.class);

        // 因为没有reducer,所以map的输出为job的最后输出,所以需要把outputkeyclass

        // outputvalueclass设置为与map的输出一致

        job.setOutputKeyClass(LongWritable.class);

        job.setOutputValueClass(Text.class);

        // 如果不希望有reducer,设置为0

        job.setNumReduceTasks(0);

        // 设置输出类

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        // 设置sequecnfile的格式,对于sequencefile的输出格式,有多种组合方式,

        //从下面的模式中选择一种,并将其余的注释掉

        // 组合方式1:不压缩模式

        SequenceFileOutputFormat.setOutputCompressionType(job,

                CompressionType.NONE);

        // 组合方式2:record压缩模式,并指定采用的压缩方式 :默认、gzip压缩等

//        SequenceFileOutputFormat.setOutputCompressionType(job,

//                CompressionType.RECORD);

//        SequenceFileOutputFormat.setOutputCompressorClass(job,

//                DefaultCodec.class);

        // 组合方式3:block压缩模式,并指定采用的压缩方式 :默认、gzip压缩等

//        SequenceFileOutputFormat.setOutputCompressionType(job,

//                CompressionType.BLOCK);

//        SequenceFileOutputFormat.setOutputCompressorClass(job,

//                DefaultCodec.class);

        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));

        int result = job.waitForCompletion(true) ? 0 : 1;

        return result;

    }

}

2)输入格式为SequcenFileInputFormat 

public class SequenceFileInputFormatDemo extends Configured implements Tool {

    public static class SequenceFileInputFormatDemoMapper extends

            Mapper<LongWritable, Text, Text, NullWritable> {

        public void map(LongWritable key, Text value, Context context)

                throws IOException, InterruptedException {

            System.out.println("key:   " + key.toString() + "  ;  value: "

                    + value.toString());

        }

    }

    public static void main(String[] args) throws Exception {

        int nRet = ToolRunner.run(new Configuration(),

                new SequenceFileInputFormatDemo(), args);

        System.out.println(nRet);

    }

    @Override

    public int run(String[] args) throws Exception { 

        Configuration conf = getConf();

        Job job = new Job(conf, "sequence file input demo");

        job.setJarByClass(SequenceFileInputFormatDemo.class);

        FileInputFormat.addInputPaths(job, args[0]);

        HdfsUtil.deleteDir(args[1]);

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setMapperClass(SequenceFileInputFormatDemoMapper.class);

        job.setNumReduceTasks(1);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(NullWritable.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(Text.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        int result = job.waitForCompletion(true) ? 0 : 1;

        return result;

    }

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: