您的位置：首页 > 编程语言 > Java开发

Java实现远程HDFS的文件操作（新建、上传、下载、删除）

2017-06-05 09:27 861 查看

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class HDFSTest01 {

public
static boolean CreatDir(String dst , Configuration conf){
Path
dstPath = new Path(dst) ;
try{
FileSystem
dhfs = FileSystem.get(conf);

dhfs.mkdirs(dstPath);
}
catch(IOException
ie){
ie.printStackTrace()
;
return
false ;
}
return
true ;
}

public
static boolean putToHDFS(String src , String dst , Configuration conf){
Path
dstPath = new Path(dst) ;
try{
FileSystem
hdfs = dstPath.getFileSystem(conf) ;
hdfs.copyFromLocalFile(false,
new Path(src), dstPath) ;
}
catch(IOException
ie){
ie.printStackTrace()
;
return
false ;
}
return
true ;
}

public
static boolean getFromHDFS(String src , String dst , Configuration conf){
Path
dstPath = new Path(dst) ;
try{
FileSystem
dhfs = dstPath.getFileSystem(conf) ;
dhfs.copyToLocalFile(false,
new Path(src), dstPath) ;
}catch(IOException
ie){
ie.printStackTrace()
;
return
false ;
}
return
true ;
}

public
static boolean checkAndDel(final String path , Configuration conf){
Path
dstPath = new Path(path) ;
try{
FileSystem
dhfs = dstPath.getFileSystem(conf) ;
if(dhfs.exists(dstPath)){
dhfs.delete(dstPath,
true) ;
}else{
return
false ;
}
}catch(IOException
ie ){
ie.printStackTrace()
;
return
false ;
}
return
true ;
}

public
static void main(String[] args) {

boolean
status = false ;
String
dst1 = "hdfs://192.168.97.135:9000/test/new" ;
Configuration
conf = new Configuration() ;

//java.lang.IllegalArgumentException:
Wrong FS: hdfs://192.168.1.225:9000/EBLearn_data/hello.txt, expected: file:///

//解决这个错误的两个方案：
//方案1：下面这条命令必须加上，否则出现上面这个错误
conf.set("fs.default.name",
"hdfs://192.168.97.135:9000"); // "hdfs://master:9000"

//方案2：将core-site.xml 和hdfs-site.xml放入当前工程中

status = CreatDir( dst1 , conf) ;

System.out.println("status="+status) ;

String dst = "hdfs://192.168.97.135:9000/EBLearn_data" ;
String
src = "I:/hello.txt" ;

status = putToHDFS( src , dst , conf) ;
System.out.println("status="+status)
;

src
= "hdfs://192.168.97.135:9000/EBLearn_data/hello.txt" ;
dst
= "I:/hadoop_need/" ;
status
= getFromHDFS( src , dst , conf) ;
System.out.println("status="+status)
;

dst
= "hdfs://192.168.97.135:9000/EBLearn_data/hello.txt" ;
status
= checkAndDel( dst , conf) ;
System.out.println("status="+status)
;
}

}

项目搭建

这里只是做一个很简单的演示，就是在Web页面提供一个上传按钮，使用户可以将本地文件上传至Hadoop集群平台。

pom.xml

首先看下pom文件的依赖：

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion>

<groupId>com.infosys.hadoop</groupId>
<artifactId>upload</artifactId>
<version>1.0-SNAPSHOT</version>

<name>upload</name>

<packaging>jar</packaging>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.1.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<hadoop.version>2.6.5</hadoop.version>

</properties>

<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>3.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- Test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>1.1.0</version>
<classifier>hadoop2</classifier>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<finalName>${project.artifactId}</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-archetype-plugin</artifactId>
<version>2.2</version>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<configuration>
<encoding>UTF-8</encoding>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>

<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.5</version>
<configuration>
<outputDirectory>${basedir}</outputDirectory>
</configuration>
</plugin>

<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

我们就是添加了一个SpringBoot和Hadoop Client的依赖。其他的是一些测试相关的。关于这个Hadoop
Client它提供了一些开发Hadoop应用所需的所有依赖，可以参考之前的一篇博客：Hadoop 2.x Maven开发环境搭建

首页

首页界面就只是提供一个上传表单按钮：

index.html

<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Upload</title>
</head>
<body>
<form action="/upload" method="post" enctype="multipart/form-data">
<p>
文件：<input type="file" name="file">
</p>
<p>
<input type="submit" value="上传">
</p>
</form>
</body>
</html>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

然后在Controller提供一个接口进行访问首页：

HomeController.Java

@Controller
@RequestMapping(value = "/")
public class HomeController {

public ModelAndView home() {
return new ModelAndView("index");
}

}

1
2
3
4
5
6
7
8
9
1
2
3
4
5
6
7
8
9

上传

上传的逻辑也很简单，就是使用

SpringBoot

上传文件的形式先将文件接收到后台，然后调用

Hadoop

提供的接口API执行上传。

上传接口UploadController.java

@Controller
public class UploadController {

@PostMapping("/upload")
@ResponseBody
public String handleFileUpload(@RequestParam("file") MultipartFile file) {

if (!file.isEmpty()) {
try {
String originalFilename = file.getOriginalFilename();

BufferedOutputStream out = new BufferedOutputStream(
new FileOutputStream(
new File(originalFilename)
)
);

out.write(file.getBytes());

out.flush();
out.close();

String destFileName = "/user/hadoop/" + originalFilename;

Upload.main(new String[]{originalFilename, destFileName});

} catch (FileNotFoundException e) {
e.printStackTrace();
return "上传失败，" + e.getMessage();
} catch (IOException e) {
e.printStackTrace();
return "上传失败, " + e.getMessage();
}

return "上传成功";

} else {
return "上传失败，文件为空。";
}

}

}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

最后我们在提供一个类来操作Hadoop接口。

Upload.java

public class Upload {

public static final String FS_DEFAULT_FS = "fs.defaultFS";
public static final String HDFS_HOST = "hdfs://192.168.1.2:9000";
public static final String CROSS_PLATFORM = "mapreduce.app-submission.cross-platform";

public static void main(String[] args) throws IOException {

Configuration conf = new Configuration();

conf.setBoolean(CROSS_PLATFORM, true);
conf.set(FS_DEFAULT_FS, HDFS_HOST);

GenericOptionsParser optionsParser = new GenericOptionsParser(conf, args);

String[] remainingArgs = optionsParser.getRemainingArgs();
if (remainingArgs.length < 2) {
System.err.println("Usage: upload <source> <dest>");
System.exit(2);
}

Path source = new Path(args[0]);
Path dest = new Path(args[1]);

FileSystem fs = FileSystem.get(conf);

fs.copyFromLocalFile(true, false, source, dest);
}
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

其中的fs.defaultFS属性需要与集群Master NameNode节点中配置的一直。该属性配置一般在

etc/hadoop/core-site.xml

文件中进行定义。

可以看到我们实际的操作很简单，就只是调用Hadoop的FileSystem接口中的

copyFromLocalFile

方法，该方法参数说明：
第一个参数：表示是否删除本地的源文件，也就是上传文件后是否保留原文件，这里为了避免后续文件越来越多，就直接采用上传成功就删除的方式。
第二个参数：表示是否覆盖已存在的文件，这里false表示不覆盖，如果HDFS集群中已存在该文件，就提示上传失败。
第三个参数：源文件路径
第四个参数：上传到HDFS指定的路径

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航