diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java index 2b1e7e4ce47..00d4b325053 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java @@ -159,6 +159,10 @@ public final class CopyListingFileStatus implements Writable { return permission; } + public boolean isErasureCoded() { + return getPermission().getErasureCodedBit(); + } + /** * Returns the full logical ACL. * diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java index d1cdfdd5485..06acd78a8a1 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java @@ -167,6 +167,9 @@ public class RetriableFileCopyCommand extends RetriableCommand { FsPermission.getUMask(targetFS.getConf())); final OutputStream outStream; if (action == FileAction.OVERWRITE) { + // If there is an erasure coding policy set on the target directory, + // files will be written to the target directory using the same EC policy. + // The replication factor of the source file is ignored and not preserved. final short repl = getReplicationFactor(fileAttributes, source, targetFS, targetPath); final long blockSize = getBlockSize(fileAttributes, source, diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java index c308e6f1f90..76bc4c56268 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java @@ -236,8 +236,13 @@ public class DistCpUtils { } } - if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() && - (srcFileStatus.getReplication() != targetFileStatus.getReplication())) { + // The replication factor can only be preserved for replicated files. + // It is ignored when either the source or target file are erasure coded. + if (attributes.contains(FileAttribute.REPLICATION) && + !targetFileStatus.isDirectory() && + !targetFileStatus.isErasureCoded() && + !srcFileStatus.isErasureCoded() && + srcFileStatus.getReplication() != targetFileStatus.getReplication()) { targetFS.setReplication(path, srcFileStatus.getReplication()); } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java index f512ef6d9d8..8efc5cf9942 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListingFileStatus.java @@ -62,6 +62,7 @@ public class TestCopyListingFileStatus { assertEquals(stat.getOwner(), clfs.getOwner()); assertEquals(stat.getGroup(), clfs.getGroup()); assertEquals(stat.getPath(), clfs.getPath()); + assertEquals(stat.isErasureCoded(), clfs.isErasureCoded()); } } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java index 8c79becfa0b..c42e5465680 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java @@ -18,6 +18,10 @@ package org.apache.hadoop.tools.util; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + import java.io.IOException; import java.io.OutputStream; import java.util.EnumSet; @@ -31,11 +35,15 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.INodeFile; +import org.apache.hadoop.hdfs.tools.ECAdmin; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.tools.CopyListingFileStatus; import org.apache.hadoop.tools.DistCpOptionSwitch; import org.apache.hadoop.tools.DistCpOptions.FileAttribute; +import org.apache.hadoop.util.ToolRunner; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -52,8 +60,10 @@ public class TestDistCpUtils { @BeforeClass public static void create() throws IOException { + config.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, + "XOR-2-1-64k"); cluster = new MiniDFSCluster.Builder(config) - .numDataNodes(1) + .numDataNodes(2) .format(true) .build(); } @@ -539,6 +549,117 @@ public class TestDistCpUtils { Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication()); } + @Test (timeout = 60000) + public void testReplFactorNotPreservedOnErasureCodedFile() throws Exception { + FileSystem fs = FileSystem.get(config); + + // Case 1: Verify replication attribute not preserved when the source + // file is erasure coded and the target file is replicated. + Path srcECDir = new Path("/tmp/srcECDir"); + Path srcECFile = new Path(srcECDir, "srcECFile"); + Path dstReplDir = new Path("/tmp/dstReplDir"); + Path dstReplFile = new Path(dstReplDir, "destReplFile"); + fs.mkdirs(srcECDir); + fs.mkdirs(dstReplDir); + String[] args = {"-setPolicy", "-path", "/tmp/srcECDir", + "-policy", "XOR-2-1-64k"}; + int res = ToolRunner.run(config, new ECAdmin(config), args); + assertEquals("Setting EC policy should succeed!", 0, res); + verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile, true, + dstReplFile, false); + + // Case 2: Verify replication attribute not preserved when the source + // file is replicated and the target file is erasure coded. + Path srcReplDir = new Path("/tmp/srcReplDir"); + Path srcReplFile = new Path(srcReplDir, "srcReplFile"); + Path dstECDir = new Path("/tmp/dstECDir"); + Path dstECFile = new Path(dstECDir, "destECFile"); + fs.mkdirs(srcReplDir); + fs.mkdirs(dstECDir); + args = new String[]{"-setPolicy", "-path", "/tmp/dstECDir", + "-policy", "XOR-2-1-64k"}; + res = ToolRunner.run(config, new ECAdmin(config), args); + assertEquals("Setting EC policy should succeed!", 0, res); + verifyReplFactorNotPreservedOnErasureCodedFile(srcReplFile, + false, dstECFile, true); + + // Case 3: Verify replication attribute not altered from the default + // INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS when both source and + // target files are erasure coded. + verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile, + true, dstECFile, true); + } + + private void verifyReplFactorNotPreservedOnErasureCodedFile(Path srcFile, + boolean isSrcEC, Path dstFile, boolean isDstEC) throws Exception { + FileSystem fs = FileSystem.get(config); + createFile(fs, srcFile); + CopyListingFileStatus srcStatus = new CopyListingFileStatus( + fs.getFileStatus(srcFile)); + if (isSrcEC) { + assertTrue(srcFile + "should be erasure coded!", + srcStatus.isErasureCoded()); + assertEquals(INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS, + srcStatus.getReplication()); + } else { + assertEquals("Unexpected replication factor for " + srcFile, + fs.getDefaultReplication(srcFile), srcStatus.getReplication()); + } + + createFile(fs, dstFile); + CopyListingFileStatus dstStatus = new CopyListingFileStatus( + fs.getFileStatus(dstFile)); + if (isDstEC) { + assertTrue(dstFile + "should be erasure coded!", + dstStatus.isErasureCoded()); + assertEquals("Unexpected replication factor for erasure coded file!", + INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS, + dstStatus.getReplication()); + } else { + assertEquals("Unexpected replication factor for " + dstFile, + fs.getDefaultReplication(dstFile), dstStatus.getReplication()); + } + + // Let srcFile and dstFile differ on their FileAttribute + fs.setPermission(srcFile, fullPerm); + fs.setOwner(srcFile, "ec", "ec-group"); + fs.setTimes(srcFile, 0, 0); + + fs.setPermission(dstFile, noPerm); + fs.setOwner(dstFile, "normal", "normal-group"); + fs.setTimes(dstFile, 100, 100); + + // Running preserve operations only for replication attribute + srcStatus = new CopyListingFileStatus(fs.getFileStatus(srcFile)); + EnumSet attributes = EnumSet.of(FileAttribute.REPLICATION); + DistCpUtils.preserve(fs, dstFile, srcStatus, attributes, false); + dstStatus = new CopyListingFileStatus(fs.getFileStatus(dstFile)); + + assertFalse("Permission for " + srcFile + " and " + dstFile + + " should not be same after preserve only for replication attr!", + srcStatus.getPermission().equals(dstStatus.getPermission())); + assertFalse("File ownership should not match!", + srcStatus.getOwner().equals(dstStatus.getOwner())); + assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup())); + assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime()); + assertFalse( + srcStatus.getModificationTime() == dstStatus.getModificationTime()); + if (isDstEC) { + assertEquals("Unexpected replication factor for erasure coded file!", + INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS, + dstStatus.getReplication()); + } else { + assertEquals(dstFile + " replication factor should be same as dst " + + "filesystem!", fs.getDefaultReplication(dstFile), + dstStatus.getReplication()); + } + if (!isSrcEC || !isDstEC) { + assertFalse(dstFile + " replication factor should not be " + + "same as " + srcFile, + srcStatus.getReplication() == dstStatus.getReplication()); + } + } + @Test public void testPreserveTimestampOnFile() throws IOException { FileSystem fs = FileSystem.get(config);