windows下idea中搭建hadoop开发环境,向远程hadoop集群提交mapreduce任务
2017-11-28 15:28
836 查看
1.下载hadoop2.6.0-cdh5.6.1,解压并安装
公司用的hadoop集群版本是hadoop2.6.0-cdh5.6.1,防止版本冲突,所有的hadoop版本号都用了这个。下载地址:http://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.6.1.tar.gz
解压,放在D:\software\hadoop-2.6.0
配置环境变量:
HADOOP_HOME=D:\software\hadoop-2.6.0
HADOOP_BIN_PATH=%HADOOP_HOME%\bin
HADOOP_PREFIX=D:\software\hadoop-2.6.0
在Path环境变量后追加 ;%HADOOP_HOME%\bin
下载winutils.exe 文件,放在D:\software\hadoop-2.6.0\bin目录下,不然运行程序时会报一个错误
2.编写代码
代码是仿造《hadoop权威指南》一书中的第二章的示例代码pom.xml文件
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.cyq</groupId> <artifactId>HadoopDemo</artifactId> <version>1.0-SNAPSHOT</version> <dependencyManagement> <dependencies> <dependency> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> <version>1.8</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0-cdh5.6.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-auth</artifactId> <version>2.6.0-cdh5.6.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0-cdh5.6.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>2.6.0-mr1-cdh5.6.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit-dep</artifactId> <version>4.8.2</version> </dependency> </dependencies> </dependencyManagement> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-auth</artifactId> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.10</version> <scope>test</scope> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>2.3.2</version> <configuration> <source>1.6</source> <target>1.6</target> </configuration> </plugin> </plugins> </build> </project>
map任务
package com.cyq; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; /** * @author Administrator * @date 2017/11/27 **/ public class MapTemp extends Mapper<LongWritable,Text,Text,IntWritable> { @Override public void map(LongWritable longWritable, Text text, Context context) { // 读取文件内容 String line = text.toString(); //读取年份 String year =line.substring(0,4); int quality=Integer.parseInt(line.substring(5,7)); try { context.write(new Text(year),new IntWritable(quality)); }catch (Exception e){ } } }
reduce任务
package com.cyq;/** * Created by Administrator on 2017/11/27. */ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.Iterator; /** * @author Administrator * @date 2017/11/27 **/ public class Reduce extends Reducer<Text,IntWritable,Text,IntWritable> { @Override public void reduce(Text text, Iterable<IntWritable> iterable,Context context){ int maxValue=Integer.MIN_VALUE; while (iterable.iterator().hasNext()){ maxValue=Math.max(maxValue,iterable.iterator().next().get()); } try { context.write(text,new IntWritable(maxValue)); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } }
main程序
package com.cyq; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * @author Administrator * @date 2017/11/27 **/ public class MaxTemperature extends Configured implements Tool{ @Override public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); conf.addResource("core-site.xml"); conf.addResource("hdfs-site.xml"); conf.addResource("mapred-site.xml"); conf.addResource("yarn-site.xml"); conf.set("mapreduce.job.jar", "D:\\chen_demo\\HadoopDemo\\target\\HadoopDemo-1.0-SNAPSHOT.jar"); conf.set("mapreduce.framework.name", "yarn"); conf.set("yarn.resourcemanager.hostname", "172.16.50.80"); conf.set("mapreduce.app-submission.cross-platform", "true"); Job job = Job.getInstance(conf); job.setJarByClass(MaxTemperature.class); job.setJobName("max temperature"); // 设置输入输出路径 FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); job.setMapperClass(MapTemp.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0:1); return 1; } public static void main(String[] args) throws Exception { ToolRunner.run(new MaxTemperature(), args); } }
3.运行程序
虽然博客写的很简单,但是新手入门,在实际搭建的时候还是遇到很多问题,没有详细写出来,多数是因为版本不一致导致的问题。也跟我一样在搭建该环境的小伙伴们如果遇到问题,希望能在评论区能互相交流一下经验,总结错误。
相关文章推荐
- windows Hadoop开发环境搭建及远程提交
- 手动搭建搭建Hadoop虚拟机集群与windows远程开发环境
- 在windows远程提交任务给Hadoop集群(Hadoop 2.6)
- windows下搭建hadoop-2.6.0本地idea开发环境
- hadoop的windows远程linux服务进行本地开发环境搭建
- windows eclipse hadoop 集群开发环境搭建(分布式模式)
- windows eclipse远程连接hadoop集群并提交任务运行
- Hadoop-Windows下的Eclipse开发环境搭建,远程虚拟机Hadoop服务器
- windows 32 eclipse 远程hadoop开发环境搭建
- hadoop spark环境搭建及idea scala maven集成开发spark任务
- windows下搭建eclipse远程开发hadoop应用环境
- Intellij IDEA远程向hadoop集群提交mapreduce作业
- windows下eclipse远程提交hadoop 2.5.2 Mapreduce任务
- Windows下的eclipse(with Hadoop Plug-in)向Linux集群提交MapReduce任务
- windows下搭建hadoop-2.6.0本地idea开发环境
- Eclipse远程提交MapReduce任务到Hadoop集群
- windows 32位eclipse远程hadoop开发环境搭建
- IDEA向Hadoop集群提交作业环境搭建
- Hadoop实践(二)---集群和开发环境搭建(Intellij IDEA & Maven 开发Hadoop)
- Windows平台开发Mapreduce程序远程调用运行在Hadoop集群—Yarn调度引擎异常