HDFS-10971. Distcp should not copy replication factor if source file is erasure coded. Contributed by Manoj Govindassamy.

This commit is contained in:
Andrew Wang 2017-03-28 22:14:03 -07:00
parent 82fb9ce8df
commit 0e6f8e4bc6
5 changed files with 137 additions and 3 deletions

View File

@ -159,6 +159,10 @@ public final class CopyListingFileStatus implements Writable {
return permission;
}
public boolean isErasureCoded() {
return getPermission().getErasureCodedBit();
}
/**
* Returns the full logical ACL.
*

View File

@ -167,6 +167,9 @@ public class RetriableFileCopyCommand extends RetriableCommand {
FsPermission.getUMask(targetFS.getConf()));
final OutputStream outStream;
if (action == FileAction.OVERWRITE) {
// If there is an erasure coding policy set on the target directory,
// files will be written to the target directory using the same EC policy.
// The replication factor of the source file is ignored and not preserved.
final short repl = getReplicationFactor(fileAttributes, source,
targetFS, targetPath);
final long blockSize = getBlockSize(fileAttributes, source,

View File

@ -236,8 +236,13 @@ public class DistCpUtils {
}
}
if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() &&
(srcFileStatus.getReplication() != targetFileStatus.getReplication())) {
// The replication factor can only be preserved for replicated files.
// It is ignored when either the source or target file are erasure coded.
if (attributes.contains(FileAttribute.REPLICATION) &&
!targetFileStatus.isDirectory() &&
!targetFileStatus.isErasureCoded() &&
!srcFileStatus.isErasureCoded() &&
srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
targetFS.setReplication(path, srcFileStatus.getReplication());
}

View File

@ -62,6 +62,7 @@ public class TestCopyListingFileStatus {
assertEquals(stat.getOwner(), clfs.getOwner());
assertEquals(stat.getGroup(), clfs.getGroup());
assertEquals(stat.getPath(), clfs.getPath());
assertEquals(stat.isErasureCoded(), clfs.isErasureCoded());
}
}

View File

@ -18,6 +18,10 @@
package org.apache.hadoop.tools.util;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
import java.io.IOException;
import java.io.OutputStream;
import java.util.EnumSet;
@ -31,11 +35,15 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.tools.ECAdmin;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpOptionSwitch;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.util.ToolRunner;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
@ -52,8 +60,10 @@ public class TestDistCpUtils {
@BeforeClass
public static void create() throws IOException {
config.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
"XOR-2-1-64k");
cluster = new MiniDFSCluster.Builder(config)
.numDataNodes(1)
.numDataNodes(2)
.format(true)
.build();
}
@ -539,6 +549,117 @@ public class TestDistCpUtils {
Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
@Test (timeout = 60000)
public void testReplFactorNotPreservedOnErasureCodedFile() throws Exception {
FileSystem fs = FileSystem.get(config);
// Case 1: Verify replication attribute not preserved when the source
// file is erasure coded and the target file is replicated.
Path srcECDir = new Path("/tmp/srcECDir");
Path srcECFile = new Path(srcECDir, "srcECFile");
Path dstReplDir = new Path("/tmp/dstReplDir");
Path dstReplFile = new Path(dstReplDir, "destReplFile");
fs.mkdirs(srcECDir);
fs.mkdirs(dstReplDir);
String[] args = {"-setPolicy", "-path", "/tmp/srcECDir",
"-policy", "XOR-2-1-64k"};
int res = ToolRunner.run(config, new ECAdmin(config), args);
assertEquals("Setting EC policy should succeed!", 0, res);
verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile, true,
dstReplFile, false);
// Case 2: Verify replication attribute not preserved when the source
// file is replicated and the target file is erasure coded.
Path srcReplDir = new Path("/tmp/srcReplDir");
Path srcReplFile = new Path(srcReplDir, "srcReplFile");
Path dstECDir = new Path("/tmp/dstECDir");
Path dstECFile = new Path(dstECDir, "destECFile");
fs.mkdirs(srcReplDir);
fs.mkdirs(dstECDir);
args = new String[]{"-setPolicy", "-path", "/tmp/dstECDir",
"-policy", "XOR-2-1-64k"};
res = ToolRunner.run(config, new ECAdmin(config), args);
assertEquals("Setting EC policy should succeed!", 0, res);
verifyReplFactorNotPreservedOnErasureCodedFile(srcReplFile,
false, dstECFile, true);
// Case 3: Verify replication attribute not altered from the default
// INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS when both source and
// target files are erasure coded.
verifyReplFactorNotPreservedOnErasureCodedFile(srcECFile,
true, dstECFile, true);
}
private void verifyReplFactorNotPreservedOnErasureCodedFile(Path srcFile,
boolean isSrcEC, Path dstFile, boolean isDstEC) throws Exception {
FileSystem fs = FileSystem.get(config);
createFile(fs, srcFile);
CopyListingFileStatus srcStatus = new CopyListingFileStatus(
fs.getFileStatus(srcFile));
if (isSrcEC) {
assertTrue(srcFile + "should be erasure coded!",
srcStatus.isErasureCoded());
assertEquals(INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS,
srcStatus.getReplication());
} else {
assertEquals("Unexpected replication factor for " + srcFile,
fs.getDefaultReplication(srcFile), srcStatus.getReplication());
}
createFile(fs, dstFile);
CopyListingFileStatus dstStatus = new CopyListingFileStatus(
fs.getFileStatus(dstFile));
if (isDstEC) {
assertTrue(dstFile + "should be erasure coded!",
dstStatus.isErasureCoded());
assertEquals("Unexpected replication factor for erasure coded file!",
INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS,
dstStatus.getReplication());
} else {
assertEquals("Unexpected replication factor for " + dstFile,
fs.getDefaultReplication(dstFile), dstStatus.getReplication());
}
// Let srcFile and dstFile differ on their FileAttribute
fs.setPermission(srcFile, fullPerm);
fs.setOwner(srcFile, "ec", "ec-group");
fs.setTimes(srcFile, 0, 0);
fs.setPermission(dstFile, noPerm);
fs.setOwner(dstFile, "normal", "normal-group");
fs.setTimes(dstFile, 100, 100);
// Running preserve operations only for replication attribute
srcStatus = new CopyListingFileStatus(fs.getFileStatus(srcFile));
EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);
DistCpUtils.preserve(fs, dstFile, srcStatus, attributes, false);
dstStatus = new CopyListingFileStatus(fs.getFileStatus(dstFile));
assertFalse("Permission for " + srcFile + " and " + dstFile +
" should not be same after preserve only for replication attr!",
srcStatus.getPermission().equals(dstStatus.getPermission()));
assertFalse("File ownership should not match!",
srcStatus.getOwner().equals(dstStatus.getOwner()));
assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
assertFalse(
srcStatus.getModificationTime() == dstStatus.getModificationTime());
if (isDstEC) {
assertEquals("Unexpected replication factor for erasure coded file!",
INodeFile.DEFAULT_REPL_FOR_STRIPED_BLOCKS,
dstStatus.getReplication());
} else {
assertEquals(dstFile + " replication factor should be same as dst " +
"filesystem!", fs.getDefaultReplication(dstFile),
dstStatus.getReplication());
}
if (!isSrcEC || !isDstEC) {
assertFalse(dstFile + " replication factor should not be " +
"same as " + srcFile,
srcStatus.getReplication() == dstStatus.getReplication());
}
}
@Test
public void testPreserveTimestampOnFile() throws IOException {
FileSystem fs = FileSystem.get(config);