您的位置:首页 > Web前端 > Node.js

Hadoop:第二个程序操作HDFS -> 【获取Datanode名】【写文件】【WordCount计数】

2011-07-21 15:57 711 查看
本代码包含功能:获取DataNode名,并写入到HDFS文件系统中的文件hdfs:///copyOftest.c中。
并计数文件hdfs:///copyOftest.c中的wordcount计数,有别于Hadoop的examples中的读取本地文件系统中的文件,这次读取的是HDFS中的文件。
package com.fora;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.Mapper.Context;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class FileOperate {public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {init();/*初始化文件*/Configuration conf = new Configuration();Job job = new Job(conf, "word count");job.setJarByClass(FileOperate.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);/* set the path of input and output*/FileInputFormat.addInputPath(job, new Path("hdfs:///copyOftest.c"));FileOutputFormat.setOutputPath(job, new Path("hdfs:///wordcount"));System.exit(job.waitForCompletion(true) ? 0 : 1);}public static class TokenizerMapperextends Mapper<Object, Text, Text, IntWritable>{private final static IntWritable one = new IntWritable(1);private Text word = new Text();public void map(Object key, Text value, Context context) throws IOException, InterruptedException {StringTokenizer itr = new StringTokenizer(value.toString());while (itr.hasMoreTokens()){word.set(itr.nextToken());context.write(word, one);}}}public static class IntSumReducerextends Reducer<Text,IntWritable,Text,IntWritable> {private IntWritable result = new IntWritable();public void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException{int sum = 0;for (IntWritable val : values){sum += val.get();}result.set(sum);context.write(key, result);}}public static void init()throws IOException {/*copy local file to hdfs*/Configuration config = new Configuration();FileSystem hdfs = null;String  srcFile = "/test.c";String  dstFile = "hdfs:///copyOftest.c";System.out.print("copy success!\n");hdfs = FileSystem.get(config);Path srcPath = new Path(srcFile);Path dstPath = new Path(dstFile);hdfs.copyFromLocalFile(srcPath, dstPath);String fileName = "hdfs:///copyOftest.c";Path path = new Path(fileName);FileStatus fileStatus =null;fileStatus = hdfs.getFileStatus(path);System.out.println(fileStatus.getBlockSize());FileSystem fs = FileSystem.get(config);DistributedFileSystem hdfs1 = (DistributedFileSystem) fs;DatanodeInfo[] dataNodeStats = hdfs1.getDataNodeStats();/*create a file on hdfs*/Path Outputpath = new Path("hdfs:///output/listOfDatanode");FSDataOutputStream outputStream = hdfs.create(Outputpath);String[] names = new String[dataNodeStats.length];for (int i = 0; i < dataNodeStats.length; i++) {names[i] = dataNodeStats[i].getHostName();/*get the list of datanodes*/System.out.println(names[i]);/*write the list of datanodes to file on hdfs*/outputStream.write(names[i].getBytes(), 0, names[i].length());}}}
运行结果:
[root@master bin]# hadoop jar HDFS.jar com.fora.FileOperatecopy success!67108864masterslave111/07/21 15:45:23 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.11/07/21 15:45:23 INFO input.FileInputFormat: Total input paths to process : 111/07/21 15:45:23 INFO mapred.JobClient: Running job: job_201107210917_000311/07/21 15:45:24 INFO mapred.JobClient:  map 0% reduce 0%11/07/21 15:45:31 INFO mapred.JobClient:  map 100% reduce 0%11/07/21 15:45:43 INFO mapred.JobClient:  map 100% reduce 100%11/07/21 15:45:45 INFO mapred.JobClient: Job complete: job_201107210917_000311/07/21 15:45:45 INFO mapred.JobClient: Counters: 1711/07/21 15:45:45 INFO mapred.JobClient:   Job Counters11/07/21 15:45:45 INFO mapred.JobClient:     Launched reduce tasks=111/07/21 15:45:45 INFO mapred.JobClient:     Rack-local map tasks=111/07/21 15:45:45 INFO mapred.JobClient:     Launched map tasks=111/07/21 15:45:45 INFO mapred.JobClient:   FileSystemCounters11/07/21 15:45:45 INFO mapred.JobClient:     FILE_BYTES_READ=22811/07/21 15:45:45 INFO mapred.JobClient:     HDFS_BYTES_READ=12611/07/21 15:45:45 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48811/07/21 15:45:45 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=14611/07/21 15:45:45 INFO mapred.JobClient:   Map-Reduce Framework11/07/21 15:45:45 INFO mapred.JobClient:     Reduce input groups=1911/07/21 15:45:45 INFO mapred.JobClient:     Combine output records=1911/07/21 15:45:45 INFO mapred.JobClient:     Map input records=811/07/21 15:45:45 INFO mapred.JobClient:     Reduce shuffle bytes=22811/07/21 15:45:45 INFO mapred.JobClient:     Reduce output records=1911/07/21 15:45:45 INFO mapred.JobClient:     Spilled Records=3811/07/21 15:45:45 INFO mapred.JobClient:     Map output bytes=21111/07/21 15:45:45 INFO mapred.JobClient:     Combine input records=2211/07/21 15:45:45 INFO mapred.JobClient:     Map output records=2211/07/21 15:45:45 INFO mapred.JobClient:     Reduce input records=19[root@master bin]# hadoop dfs  -ls /Found 6 items-rw-r--r--   1 root supergroup        126 2011-07-21 15:45 /copyOftest.c-rw-r--r--   1 root supergroup         26 2011-07-21 15:16 /listOfDatanodedrwxr-xr-x   - root supergroup          0 2011-07-21 15:45 /output-rw-r--r--   1 root supergroup      10400 2011-07-20 16:51 /test.txtdrwxr-xr-x   - root supergroup          0 2011-07-20 16:09 /tmpdrwxr-xr-x   - root supergroup          0 2011-07-21 15:45 /wordcount[root@master bin]# hadoop dfs -ls /wordcountFound 2 itemsdrwxr-xr-x   - root supergroup          0 2011-07-21 15:45 /wordcount/_logs-rw-r--r--   1 root supergroup        146 2011-07-21 15:45 /wordcount/part-r-00000[root@master bin]# hadoop dfs -cat /wordcount/part-r-000002011-07-21      1File    1Hadoop  1System! 1a       1aimed   1at      1coping  1file    3from    1from:fora       1is      1local   1system  1thank   1the     1this    2to      1you!    1[root@master bin]#
  
                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: