From fee33057f07874a3cee259541f93bc8a6c3f571a Mon Sep 17 00:00:00 2001 From: Kai Xie Date: Tue, 8 Jan 2019 13:35:17 +0000 Subject: [PATCH] HADOOP-16018. DistCp won't reassemble chunks when blocks per chunk > 0. Contributed by Kai Xie. (cherry picked from commit 188bebbe7eaec71433548dc47f5a9a995cbea15a) --- .../apache/hadoop/tools/DistCpConstants.java | 4 +++ .../hadoop/tools/DistCpOptionSwitch.java | 2 +- .../hadoop/tools/TestDistCpOptions.java | 29 +++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 212256ccfd7..494609144b2 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -123,6 +123,10 @@ public final class DistCpConstants { public static final String CONF_LABEL_COPY_BUFFER_SIZE = "distcp.copy.buffer.size"; + /** DistCp Blocks Per Chunk: {@value}. */ + public static final String CONF_LABEL_BLOCKS_PER_CHUNK = + "distcp.blocks.per.chunk"; + /** * Constants for DistCp return code to shell / consumer of ToolRunner's run */ diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java index 3ce12b264d3..e57e413de33 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java @@ -192,7 +192,7 @@ public enum DistCpOptionSwitch { new Option("sizelimit", true, "(Deprecated!) Limit number of files " + "copied to <= n bytes")), - BLOCKS_PER_CHUNK("", + BLOCKS_PER_CHUNK(DistCpConstants.CONF_LABEL_BLOCKS_PER_CHUNK, new Option("blocksperchunk", true, "If set to a positive value, files" + "with more blocks than this value will be split into chunks of " + " blocks to be transferred in parallel, and " diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java index dd8ec697b65..62a2e6d7514 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java @@ -20,6 +20,7 @@ package org.apache.hadoop.tools; import java.util.Collections; +import org.apache.hadoop.conf.Configuration; import org.junit.Assert; import org.junit.Test; @@ -533,4 +534,32 @@ public class TestDistCpOptions { builder.withLogPath(logPath).withVerboseLog(true); Assert.assertTrue(builder.build().shouldVerboseLog()); } + + @Test + public void testAppendToConf() { + final int expectedBlocksPerChunk = 999; + final String expectedValForEmptyConfigKey = "VALUE_OF_EMPTY_CONFIG_KEY"; + + DistCpOptions options = new DistCpOptions.Builder( + Collections.singletonList( + new Path("hdfs://localhost:8020/source")), + new Path("hdfs://localhost:8020/target/")) + .withBlocksPerChunk(expectedBlocksPerChunk) + .build(); + + Configuration config = new Configuration(); + config.set("", expectedValForEmptyConfigKey); + + options.appendToConf(config); + Assert.assertEquals(expectedBlocksPerChunk, + config.getInt( + DistCpOptionSwitch + .BLOCKS_PER_CHUNK + .getConfigLabel(), 0)); + Assert.assertEquals( + "Some DistCpOptionSwitch's config label is empty! " + + "Pls ensure the config label is provided when apply to config, " + + "otherwise it may not be fetched properly", + expectedValForEmptyConfigKey, config.get("")); + } }