HADOOP-16932. distcp copy calls getFileStatus() needlessly and can fail against S3 (#1936)
Contributed by Steve Loughran. This strips out all the -p preservation options which have already been processed when uploading a file before deciding whether or not to query the far end for the status of the (existing/uploaded) file to see if any other attributes need changing. This will avoid 404 caching-related issues in S3, wherein a newly created file can have a 404 entry in the S3 load balancer's cache from the probes for the file's existence prior to the upload. It partially addresses a regression caused by HADOOP-8143, "Change distcp to have -pb on by default" that causes a resurfacing of HADOOP-13145, "In DistCp, prevent unnecessary getFileStatus call when not preserving metadata" Change-Id: Ibc25d19e92548e6165eb8397157ebf89446333f7
This commit is contained in:
parent
749a5b81da
commit
e4331a73c9
|
@ -199,6 +199,9 @@ public class DistCpUtils {
|
||||||
EnumSet<FileAttribute> attributes,
|
EnumSet<FileAttribute> attributes,
|
||||||
boolean preserveRawXattrs) throws IOException {
|
boolean preserveRawXattrs) throws IOException {
|
||||||
|
|
||||||
|
// strip out those attributes we don't need any more
|
||||||
|
attributes.remove(FileAttribute.BLOCKSIZE);
|
||||||
|
attributes.remove(FileAttribute.CHECKSUMTYPE);
|
||||||
// If not preserving anything from FileStatus, don't bother fetching it.
|
// If not preserving anything from FileStatus, don't bother fetching it.
|
||||||
FileStatus targetFileStatus = attributes.isEmpty() ? null :
|
FileStatus targetFileStatus = attributes.isEmpty() ? null :
|
||||||
targetFS.getFileStatus(path);
|
targetFS.getFileStatus(path);
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.junit.Test;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
@ -66,6 +67,7 @@ import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.aclEntry;
|
||||||
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
@ -191,15 +193,95 @@ public class TestDistCpUtils {
|
||||||
|
|
||||||
DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);
|
DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);
|
||||||
|
|
||||||
CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));
|
assertStatusEqual(fs, dst, srcStatus);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertStatusEqual(final FileSystem fs,
|
||||||
|
final Path dst,
|
||||||
|
final CopyListingFileStatus srcStatus) throws IOException {
|
||||||
|
FileStatus destStatus = fs.getFileStatus(dst);
|
||||||
|
CopyListingFileStatus dstStatus = new CopyListingFileStatus(
|
||||||
|
destStatus);
|
||||||
|
|
||||||
|
String text = String.format("Source %s; dest %s: wrong ", srcStatus,
|
||||||
|
destStatus);
|
||||||
|
|
||||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||||
Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
|
assertEquals(text + "permission",
|
||||||
Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
|
srcStatus.getPermission(), dstStatus.getPermission());
|
||||||
Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
|
assertEquals(text + "owner",
|
||||||
Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
|
srcStatus.getOwner(), dstStatus.getOwner());
|
||||||
Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
|
assertEquals(text + "group",
|
||||||
Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
|
srcStatus.getGroup(), dstStatus.getGroup());
|
||||||
|
assertEquals(text + "accessTime",
|
||||||
|
srcStatus.getAccessTime(), dstStatus.getAccessTime());
|
||||||
|
assertEquals(text + "modificationTime",
|
||||||
|
srcStatus.getModificationTime(), dstStatus.getModificationTime());
|
||||||
|
assertEquals(text + "replication",
|
||||||
|
srcStatus.getReplication(), dstStatus.getReplication());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertStatusNotEqual(final FileSystem fs,
|
||||||
|
final Path dst,
|
||||||
|
final CopyListingFileStatus srcStatus) throws IOException {
|
||||||
|
FileStatus destStatus = fs.getFileStatus(dst);
|
||||||
|
CopyListingFileStatus dstStatus = new CopyListingFileStatus(
|
||||||
|
destStatus);
|
||||||
|
|
||||||
|
String text = String.format("Source %s; dest %s: wrong ",
|
||||||
|
srcStatus, destStatus);
|
||||||
|
// FileStatus.equals only compares path field,
|
||||||
|
// must explicitly compare all fields
|
||||||
|
assertNotEquals(text + "permission",
|
||||||
|
srcStatus.getPermission(), dstStatus.getPermission());
|
||||||
|
assertNotEquals(text + "owner",
|
||||||
|
srcStatus.getOwner(), dstStatus.getOwner());
|
||||||
|
assertNotEquals(text + "group",
|
||||||
|
srcStatus.getGroup(), dstStatus.getGroup());
|
||||||
|
assertNotEquals(text + "accessTime",
|
||||||
|
srcStatus.getAccessTime(), dstStatus.getAccessTime());
|
||||||
|
assertNotEquals(text + "modificationTime",
|
||||||
|
srcStatus.getModificationTime(), dstStatus.getModificationTime());
|
||||||
|
assertNotEquals(text + "replication",
|
||||||
|
srcStatus.getReplication(), dstStatus.getReplication());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSkipsNeedlessAttributes() throws Exception {
|
||||||
|
FileSystem fs = FileSystem.get(config);
|
||||||
|
|
||||||
|
// preserve replication, block size, user, group, permission,
|
||||||
|
// checksum type and timestamps
|
||||||
|
|
||||||
|
Path src = new Path("/tmp/testSkipsNeedlessAttributes/source");
|
||||||
|
Path dst = new Path("/tmp/testSkipsNeedlessAttributes/dest");
|
||||||
|
|
||||||
|
// there is no need to actually create a source file, just a file
|
||||||
|
// status of one
|
||||||
|
CopyListingFileStatus srcStatus = new CopyListingFileStatus(
|
||||||
|
new FileStatus(0, false, 1, 32, 0, src));
|
||||||
|
|
||||||
|
// if an attribute is needed, preserve will fail to find the file
|
||||||
|
EnumSet<FileAttribute> attrs = EnumSet.of(FileAttribute.ACL,
|
||||||
|
FileAttribute.GROUP,
|
||||||
|
FileAttribute.PERMISSION,
|
||||||
|
FileAttribute.TIMES,
|
||||||
|
FileAttribute.XATTR);
|
||||||
|
for (FileAttribute attr : attrs) {
|
||||||
|
intercept(FileNotFoundException.class, () ->
|
||||||
|
DistCpUtils.preserve(fs, dst, srcStatus,
|
||||||
|
EnumSet.of(attr),
|
||||||
|
false));
|
||||||
|
}
|
||||||
|
|
||||||
|
// but with the preservation flags only used
|
||||||
|
// in file creation, this does not happen
|
||||||
|
DistCpUtils.preserve(fs, dst, srcStatus,
|
||||||
|
EnumSet.of(
|
||||||
|
FileAttribute.BLOCKSIZE,
|
||||||
|
FileAttribute.CHECKSUMTYPE),
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -258,16 +340,8 @@ public class TestDistCpUtils {
|
||||||
|
|
||||||
// FileStatus.equals only compares path field, must explicitly compare all
|
// FileStatus.equals only compares path field, must explicitly compare all
|
||||||
// fields
|
// fields
|
||||||
Assert.assertEquals("getPermission", srcStatus.getPermission(),
|
assertStatusEqual(fs, dest, srcStatus);
|
||||||
dstStatus.getPermission());
|
|
||||||
Assert.assertEquals("Owner", srcStatus.getOwner(), dstStatus.getOwner());
|
|
||||||
Assert.assertEquals("Group", srcStatus.getGroup(), dstStatus.getGroup());
|
|
||||||
Assert.assertEquals("AccessTime", srcStatus.getAccessTime(),
|
|
||||||
dstStatus.getAccessTime());
|
|
||||||
Assert.assertEquals("ModificationTime", srcStatus.getModificationTime(),
|
|
||||||
dstStatus.getModificationTime());
|
|
||||||
Assert.assertEquals("Replication", srcStatus.getReplication(),
|
|
||||||
dstStatus.getReplication());
|
|
||||||
Assert.assertArrayEquals(en1.toArray(), dd2.toArray());
|
Assert.assertArrayEquals(en1.toArray(), dd2.toArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -486,12 +560,7 @@ public class TestDistCpUtils {
|
||||||
CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));
|
CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));
|
||||||
|
|
||||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||||
Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
|
assertStatusNotEqual(fs, dst, srcStatus);
|
||||||
Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
|
|
||||||
Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
|
|
||||||
Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
|
|
||||||
Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
|
|
||||||
Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -842,13 +911,7 @@ public class TestDistCpUtils {
|
||||||
|
|
||||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||||
// attributes of src -> f2 ? should be yes
|
// attributes of src -> f2 ? should be yes
|
||||||
CopyListingFileStatus f2Status = new CopyListingFileStatus(fs.getFileStatus(f2));
|
assertStatusEqual(fs, f2, srcStatus);
|
||||||
Assert.assertTrue(srcStatus.getPermission().equals(f2Status.getPermission()));
|
|
||||||
Assert.assertTrue(srcStatus.getOwner().equals(f2Status.getOwner()));
|
|
||||||
Assert.assertTrue(srcStatus.getGroup().equals(f2Status.getGroup()));
|
|
||||||
Assert.assertTrue(srcStatus.getAccessTime() == f2Status.getAccessTime());
|
|
||||||
Assert.assertTrue(srcStatus.getModificationTime() == f2Status.getModificationTime());
|
|
||||||
Assert.assertTrue(srcStatus.getReplication() == f2Status.getReplication());
|
|
||||||
|
|
||||||
// attributes of src -> f1 ? should be no
|
// attributes of src -> f1 ? should be no
|
||||||
CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));
|
CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));
|
||||||
|
@ -1047,13 +1110,7 @@ public class TestDistCpUtils {
|
||||||
|
|
||||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||||
// attributes of src -> f0 ? should be yes
|
// attributes of src -> f0 ? should be yes
|
||||||
CopyListingFileStatus f0Status = new CopyListingFileStatus(fs.getFileStatus(f0));
|
assertStatusEqual(fs, f0, srcStatus);
|
||||||
Assert.assertTrue(srcStatus.getPermission().equals(f0Status.getPermission()));
|
|
||||||
Assert.assertTrue(srcStatus.getOwner().equals(f0Status.getOwner()));
|
|
||||||
Assert.assertTrue(srcStatus.getGroup().equals(f0Status.getGroup()));
|
|
||||||
Assert.assertTrue(srcStatus.getAccessTime() == f0Status.getAccessTime());
|
|
||||||
Assert.assertTrue(srcStatus.getModificationTime() == f0Status.getModificationTime());
|
|
||||||
Assert.assertTrue(srcStatus.getReplication() == f0Status.getReplication());
|
|
||||||
|
|
||||||
// attributes of src -> f1 ? should be no
|
// attributes of src -> f1 ? should be no
|
||||||
CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));
|
CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));
|
||||||
|
|
Loading…
Reference in New Issue