HADOOP-17122: Preserving Directory Attributes in DistCp with Atomic Copy (#2133)

Contributed by Swaminathan Balachandran
This commit is contained in:
swamirishi 2020-08-22 23:18:21 +05:30 committed by GitHub
parent f734455e5d
commit 872c2909bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 59 additions and 16 deletions

View File

@ -318,8 +318,10 @@ public class CopyCommitter extends FileOutputCommitter {
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf, SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sourceListing)); SequenceFile.Reader.file(sourceListing));
long totalLen = clusterFS.getFileStatus(sourceListing).getLen(); long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
// For Atomic Copy the Final & Work Path are different & atomic copy has
Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); // already moved it to final path.
Path targetRoot =
new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
long preservedEntries = 0; long preservedEntries = 0;
try { try {

View File

@ -53,6 +53,8 @@ import java.io.IOException;
import java.util.*; import java.util.*;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*; import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH;
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_TARGET_WORK_PATH;
import static org.apache.hadoop.tools.util.TestDistCpUtils.*; import static org.apache.hadoop.tools.util.TestDistCpUtils.*;
public class TestCopyCommitter { public class TestCopyCommitter {
@ -160,10 +162,10 @@ public class TestCopyCommitter {
context.setTargetPathExists(false); context.setTargetPathExists(false);
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); Path listingFile = new Path("/tmp1/" + rand.nextLong());
listing.buildListing(listingFile, context); listing.buildListing(listingFile, context);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
committer.commitJob(jobContext); committer.commitJob(jobContext);
checkDirectoryPermissions(fs, targetBase, sourcePerm); checkDirectoryPermissions(fs, targetBase, sourcePerm);
@ -179,6 +181,45 @@ public class TestCopyCommitter {
} }
@Test
public void testPreserveStatusWithAtomicCommit() throws IOException {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(
taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String sourceBase;
String workBase;
String targetBase;
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
FsPermission sourcePerm = new FsPermission((short) 511);
FsPermission initialPerm = new FsPermission((short) 448);
sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
workBase = TestDistCpUtils.createTestSetup(fs, initialPerm);
targetBase = "/tmp1/" + rand.nextLong();
final DistCpOptions options = new DistCpOptions.Builder(
Collections.singletonList(new Path(sourceBase)), new Path("/out"))
.preserve(FileAttribute.PERMISSION).build();
options.appendToConf(conf);
final DistCpContext context = new DistCpContext(options);
context.setTargetPathExists(false);
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
Path listingFile = new Path("/tmp1/" + rand.nextLong());
listing.buildListing(listingFile, context);
conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
conf.set(CONF_LABEL_TARGET_WORK_PATH, workBase);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
committer.commitJob(jobContext);
checkDirectoryPermissions(fs, targetBase, sourcePerm);
} finally {
TestDistCpUtils.delete(fs, "/tmp1");
conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
}
}
@Test @Test
public void testDeleteMissing() throws IOException { public void testDeleteMissing() throws IOException {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
@ -207,8 +248,8 @@ public class TestCopyCommitter {
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, context); listing.buildListing(listingFile, context);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
committer.commitJob(jobContext); committer.commitJob(jobContext);
verifyFoldersAreInSync(fs, targetBase, sourceBase); verifyFoldersAreInSync(fs, targetBase, sourceBase);
@ -256,8 +297,8 @@ public class TestCopyCommitter {
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, context); listing.buildListing(listingFile, context);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
Path sourceListing = new Path( Path sourceListing = new Path(
conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
@ -320,8 +361,8 @@ public class TestCopyCommitter {
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, context); listing.buildListing(listingFile, context);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
committer.commitJob(jobContext); committer.commitJob(jobContext);
verifyFoldersAreInSync(fs, targetBase, sourceBase); verifyFoldersAreInSync(fs, targetBase, sourceBase);
@ -353,8 +394,8 @@ public class TestCopyCommitter {
fs = FileSystem.get(conf); fs = FileSystem.get(conf);
fs.mkdirs(new Path(workPath)); fs.mkdirs(new Path(workPath));
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); conf.set(CONF_LABEL_TARGET_WORK_PATH, workPath);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.set(CONF_LABEL_TARGET_FINAL_PATH, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
assertPathExists(fs, "Work path", new Path(workPath)); assertPathExists(fs, "Work path", new Path(workPath));
@ -391,8 +432,8 @@ public class TestCopyCommitter {
fs.mkdirs(new Path(workPath)); fs.mkdirs(new Path(workPath));
fs.mkdirs(new Path(finalPath)); fs.mkdirs(new Path(finalPath));
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); conf.set(CONF_LABEL_TARGET_WORK_PATH, workPath);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.set(CONF_LABEL_TARGET_FINAL_PATH, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
assertPathExists(fs, "Work path", new Path(workPath)); assertPathExists(fs, "Work path", new Path(workPath));
@ -463,8 +504,8 @@ public class TestCopyCommitter {
+ String.valueOf(rand.nextLong())); + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, context); listing.buildListing(listingFile, context);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
OutputCommitter committer = new CopyCommitter( OutputCommitter committer = new CopyCommitter(
null, taskAttemptContext); null, taskAttemptContext);