From ad69baf6a9139c0af81e9f72e41c1e3aeb119ebc Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Thu, 13 Oct 2016 13:24:37 -0700 Subject: [PATCH] HADOOP-13024. Distcp with -delete feature on raw data not implemented. Contributed by Mavin Martin. (cherry picked from commit 0a85d079838f532a13ca237300386d1b3bc1b178) --- .../apache/hadoop/tools/DistCpConstants.java | 12 ++++- .../hadoop/tools/mapred/CopyCommitter.java | 5 ++- .../hadoop/tools/TestDistCpWithRawXAttrs.java | 45 +++++++++---------- .../hadoop/tools/util/DistCpTestUtils.java | 32 ++++++++----- 4 files changed, 56 insertions(+), 38 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 525f4cec400..a6eda53439d 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -18,6 +18,8 @@ package org.apache.hadoop.tools; * limitations under the License. */ +import org.apache.hadoop.fs.Path; + /** * Utility class to hold commonly used constants. */ @@ -140,10 +142,18 @@ public class DistCpConstants { public static final int MIN_RECORDS_PER_CHUNK_DEFAULT = 5; public static final int SPLIT_RATIO_DEFAULT = 2; + /** + * Constants for NONE file deletion + */ + public static final String NONE_PATH_NAME = "/NONE"; + public static final Path NONE_PATH = new Path(NONE_PATH_NAME); + public static final Path RAW_NONE_PATH = new Path( + DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME + NONE_PATH_NAME); + /** * Value of reserved raw HDFS directory when copying raw.* xattrs. */ - static final String HDFS_RESERVED_RAW_DIRECTORY_NAME = "/.reserved/raw"; + public static final String HDFS_RESERVED_RAW_DIRECTORY_NAME = "/.reserved/raw"; static final String HDFS_DISTCP_DIFF_DIRECTORY_NAME = ".distcp.diff.tmp"; } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index 6d2fef5f907..dd653b297df 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -238,7 +238,10 @@ public class CopyCommitter extends FileOutputCommitter { List targets = new ArrayList(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath); - DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); + Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath) + .toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME) + ? DistCpConstants.RAW_NONE_PATH : DistCpConstants.NONE_PATH; + DistCpOptions options = new DistCpOptions(targets, resultNonePath); // // Set up options to be the same from the CopyListing.buildListing's perspective, // so to collect similar listings as when doing the copy diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java index 5aef51a830e..8adc2cfb867 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java @@ -82,14 +82,7 @@ public class TestDistCpWithRawXAttrs { final String relDst = "/./.reserved/../.reserved/raw/../raw/dest/../dest"; doTestPreserveRawXAttrs(relSrc, relDst, "-px", true, true, DistCpConstants.SUCCESS); - doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, "-px", - false, true, DistCpConstants.SUCCESS); - doTestPreserveRawXAttrs(rootedSrcName, rawDestName, "-px", - false, true, DistCpConstants.INVALID_ARGUMENT); - doTestPreserveRawXAttrs(rawSrcName, rootedDestName, "-px", - false, true, DistCpConstants.INVALID_ARGUMENT); - doTestPreserveRawXAttrs(rawSrcName, rawDestName, "-px", - true, true, DistCpConstants.SUCCESS); + doTestStandardPreserveRawXAttrs("-px", true); final Path savedWd = fs.getWorkingDirectory(); try { fs.setWorkingDirectory(new Path("/.reserved/raw")); @@ -103,27 +96,18 @@ public class TestDistCpWithRawXAttrs { /* Test that XAttrs are not preserved and raw.* are when appropriate. */ @Test public void testPreserveRawXAttrs2() throws Exception { - doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, "-p", - false, false, DistCpConstants.SUCCESS); - doTestPreserveRawXAttrs(rootedSrcName, rawDestName, "-p", - false, false, DistCpConstants.INVALID_ARGUMENT); - doTestPreserveRawXAttrs(rawSrcName, rootedDestName, "-p", - false, false, DistCpConstants.INVALID_ARGUMENT); - doTestPreserveRawXAttrs(rawSrcName, rawDestName, "-p", - true, false, DistCpConstants.SUCCESS); + doTestStandardPreserveRawXAttrs("-p", false); } /* Test that XAttrs are not preserved and raw.* are when appropriate. */ @Test public void testPreserveRawXAttrs3() throws Exception { - doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, null, - false, false, DistCpConstants.SUCCESS); - doTestPreserveRawXAttrs(rootedSrcName, rawDestName, null, - false, false, DistCpConstants.INVALID_ARGUMENT); - doTestPreserveRawXAttrs(rawSrcName, rootedDestName, null, - false, false, DistCpConstants.INVALID_ARGUMENT); - doTestPreserveRawXAttrs(rawSrcName, rawDestName, null, - true, false, DistCpConstants.SUCCESS); + doTestStandardPreserveRawXAttrs(null, false); + } + + @Test + public void testPreserveRawXAttrs4() throws Exception { + doTestStandardPreserveRawXAttrs("-update -delete", false); } private static Path[] pathnames = { new Path("dir1"), @@ -145,6 +129,19 @@ public class TestDistCpWithRawXAttrs { } } + private void doTestStandardPreserveRawXAttrs(String options, + boolean expectUser) + throws Exception { + doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, options, + false, expectUser, DistCpConstants.SUCCESS); + doTestPreserveRawXAttrs(rootedSrcName, rawDestName, options, + false, expectUser, DistCpConstants.INVALID_ARGUMENT); + doTestPreserveRawXAttrs(rawSrcName, rootedDestName, options, + false, expectUser, DistCpConstants.INVALID_ARGUMENT); + doTestPreserveRawXAttrs(rawSrcName, rawDestName, options, + true, expectUser, DistCpConstants.SUCCESS); + } + private void doTestPreserveRawXAttrs(String src, String dest, String preserveOpts, boolean expectRaw, boolean expectUser, int expectedExitCode) throws Exception { diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/DistCpTestUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/DistCpTestUtils.java index 27216381d1b..624f7d5a0ef 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/DistCpTestUtils.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/DistCpTestUtils.java @@ -18,20 +18,19 @@ package org.apache.hadoop.tools.util; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.tools.DistCp; +import org.apache.hadoop.util.ToolRunner; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import org.apache.hadoop.tools.DistCp; -import org.apache.hadoop.util.ToolRunner; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Utility class for DistCpTests @@ -79,10 +78,19 @@ public class DistCpTestUtils { public static void assertRunDistCp(int exitCode, String src, String dst, String options, Configuration conf) throws Exception { + assertRunDistCp(exitCode, src, dst, + options == null ? new String[0] : options.trim().split(" "), conf); + } + + private static void assertRunDistCp(int exitCode, String src, String dst, + String[] options, Configuration conf) + throws Exception { DistCp distCp = new DistCp(conf, null); - String[] optsArr = options == null ? - new String[] { src, dst } : - new String[] { options, src, dst }; + String[] optsArr = new String[options.length + 2]; + System.arraycopy(options, 0, optsArr, 0, options.length); + optsArr[optsArr.length - 2] = src; + optsArr[optsArr.length - 1] = dst; + assertEquals(exitCode, ToolRunner.run(conf, distCp, optsArr)); }