03.HDFS常用API

2019-11-07  本文已影响0人  哈哈大圣

HDFS常用API

一、工程搭建

1). 创建Maven工程

1).如果配置了阿里云镜像,建议注释掉;2).建议重新指定一个干净的本地仓库

  1. pox.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.hahadasheng.bigdata</groupId>
    <artifactId>hadoop-learning</artifactId>
    <version>1.0-RELEASE</version>
    <packaging>jar</packaging>
    <name>hadoop-learning</name>

    <properties>
        <!--定义Hadoop版本-->
        <hadoop.version>2.6.0-cdh5.15.1</hadoop.version>
    </properties>

    <!---引入cdh的仓库: 注意:Maven软件中的Setting如果配置比如阿里镜像可能导致下载不了jar包-->
    <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
    </repositories>

    <dependencies>
        <!--添加Hadoop依赖包-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!--添加junit依赖包-->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.3</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

2). 获取文件系统句柄

package com.hahadasheng.bigdata.hadooplearning;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.net.URI;

/**
 * 【注意对应的包,要是hadoop相关的】
 * 1. 创建Configuration
 * 2. 获取FileSystem
 * 3. 操作
 * @author Liucheng
 * @since 2019-11-06
 */
public class HDFSApp {

    public static void main(String[] args) throws Exception {
        // 获取文件系统
        Configuration configuration = new Configuration();
        // 本地hosts配置了映射关系: 192.168.10.188 hadoop000
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop000:8020"), configuration, "hadoop");

        // 创建文件夹
        Path path = new Path("/hdfsapi/test");

        final boolean result = fileSystem.mkdirs(path);
        System.out.println(result);
    }
}

二、API开发

使用JUnit框架测试;在同一个测试类中

1). 前置准备

package com.hahadasheng.bigdata.hadooplearning;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.*;
import java.net.URI;

/**
 * @author Liucheng
 * @since 2019-11-06
 */
public class HDFSAppTest {

    private FileSystem fileSystem;

    @Before
    public void init() throws Exception{
        Configuration configuration = new Configuration();

        // 见下方 设置副本系数 案例
        configuration.set("dfs.replication", "1");
        // 本地hosts配置了映射关系: 192.168.10.188 hadoop000
        this.fileSystem = FileSystem.get(new URI("hdfs://hadoop000:8020"), configuration, "hadoop");
        System.out.println("~~~~~~~~~~~~~~test up ~~~~~~~~~~~~~~~~~~~~~~");
    }

    @After
    public void release() {
        System.out.println("\n~~~~~~~~~~~~~~test down ~~~~~~~~~~~~~~~~~~~~~~");

    }
    
    /* ~~~~~ 其他代码 ~~~~~ */
}

2). 创建文件夹

@Test
public void mkdir() throws IOException {
    // 创建文件夹
    Path path = new Path("/hdfsapi/test");
    boolean result = fileSystem.mkdirs(path);
    System.out.println(result);
}

3). 创建文件,并写入内容

@Test
public void create() throws Exception {
    FSDataOutputStream out = fileSystem.create(new Path("/b.txt"));

    out.write("hello pk".getBytes()); # 
    out.flush();
    out.close();
}

4). 查看HDFS文件的内容: 文件内部的数据

@Test
public void text() throws IOException {
    FSDataInputStream in = fileSystem.open(new Path("/b.txt"));
    // 将内容显示到控制台
    IOUtils.copyBytes(in, System.out, 1024);
}

5). 设置副本系数

  1. 通过命令行上传的文件,副本系数为/$HADOOP_HOME/etc/hadoop/hdfs-site.xml中的dfs.replication配置为准。
  2. 通过Java客户端,默认是工具默认的系数org.apache.hadoop:hadoop-hdfs依赖下面的hdfs-default.xml中dfs.replication配置
  3. 如果需要定制,可以在Configuration对象中进行配置
@Test
public void replicationTest() throws Exception {
    // 见上面副本系数设置
    this.create();
}

6). 重命名

@Test
public void rename() throws IOException {
    Path oldName = new Path("/b.txt");
    Path newName = new Path("/c.txt");
    boolean result = fileSystem.rename(oldName, newName);
    System.out.println(result);
}

7). 拷贝本地文件到HDFS文件系统

@Test
public void copyFromLocalFile() throws IOException {
    String path = Thread.currentThread().getContextClassLoader().getResource("localfile.txt").getPath();

    Path localFilePath = new Path(path);
   // 如果path前面不加斜杠,表示的路径默认为用户路径,这里为为 /user/hadoop;即 hadoop fs -ls 这里等同于 hadoop fs -ls /user/hadoop
    Path remoteFilePath = new Path("/remotefile.txt");
    fileSystem.copyFromLocalFile(localFilePath, remoteFilePath);
}

8). 拷贝文件到HDFS文件系统,带上进度条

@Test
public void copyFileWithProcessBar() throws IOException {
    String path = Thread.currentThread().getContextClassLoader().getResource("mysql.rar").getPath();

    InputStream in = new BufferedInputStream(new FileInputStream(new File(path)));
    FSDataOutputStream out = fileSystem.create(new Path("/mysql.rar"), new Progressable() {
        @Override
        public void progress() {
            System.out.print(">");
        }
    });

    IOUtils.copyBytes(in, out, 4096);
}

9). 下载文件

@Test
public void copyToLocalFile() throws Exception {
    String fileNameLocal = "E:/ImprovementWorkingSpace/hadoop-learning/src/test/resources/";
    Path src = new Path("/remotefile.txt");
    Path dst = new Path(fileNameLocal);
    // 注意,Windows环境的得使用本地的文件系统!如下!
    fileSystem.copyToLocalFile(false, src, dst, true);
}

9). 列出文件夹下面的内容

@Test
public void listFile() throws Exception {
    FileStatus[] files = fileSystem.listStatus(new Path("/"));

    for (FileStatus file : files) {
        StringBuilder sb = new StringBuilder("~~~~~~~~~\n");
        sb.append("path:\t").append(file.getPath())
           .append("\nlength:\t").append(file.getLen())
           .append("\nisdir:\t").append(file.isDirectory())
           .append("\nblock_replication:\t").append(file.getReplication())
           .append("\nblocksize:\t").append(file.getBlockSize())
           .append("\nmodification_time:\t").append(file.getModificationTime())
           .append("\npermission:\t").append(file.getPermission())
           .append("\nowner:\t").append(file.getOwner())
           .append("\ngroup:\t").append(file.getGroup())
           .append("\nsymlink:\t").append(file.isSymlink())
           .append("~~~~~~~~~\n");

        System.out.println(sb.toString());
    }
}

10). 递归列出文件:注意:只是文件,没有文件夹

@Test
public void listFileRecursive() throws Exception {
    RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(new Path("/"), true);
    while (iterator.hasNext()) {
        LocatedFileStatus file = iterator.next();
        StringBuilder sb = new StringBuilder("~~~~~~~~~\n");
        sb.append("path:\t").append(file.getPath())
                .append("\nlength:\t").append(file.getLen())
                .append("\nisdir:\t").append(file.isDirectory())
                .append("\nblock_replication:\t").append(file.getReplication())
                .append("\nblocksize:\t").append(file.getBlockSize())
                .append("\nmodification_time:\t").append(file.getModificationTime())
                .append("\npermission:\t").append(file.getPermission())
                .append("\nowner:\t").append(file.getOwner())
                .append("\ngroup:\t").append(file.getGroup())
                .append("\nsymlink:\t").append(file.isSymlink())
                .append("~~~~~~~~~\n");

        System.out.println(sb.toString());
    }
}

11). 查看文件块信息: 文件分成几个块,副本等

@Test
public void getFileBlockLocations() throws IOException {
    FileStatus fileStatus = fileSystem.getFileStatus(new Path("/c.txt"));
    BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
    for (BlockLocation block : blocks) {
        for (String name : block.getNames()) {
            System.out.print(name + " : " + block.getLength() + " : ");
            for (String host : block.getHosts()) {
                System.out.print(host + "、");
            }
            System.out.println();
        }
    }
}

12). 删除文件:选择递归或者非递归删除

@Test
public void delete() throws Exception {
    boolean result = fileSystem.delete(new Path("/user"), true);
    System.out.println(result);
}

上一篇 下一篇

猜你喜欢

热点阅读