HADOOP-16635. S3A "directories only" scan still does a HEAD.
Contributed by Steve Loughran. Change-Id: I5e41d7f721364c392e1f4344db83dfa8c5aa06ce
This commit is contained in:
parent
dee9e97075
commit
74e5018d87
@ -2610,7 +2610,7 @@ public FileStatus getFileStatus(final Path f) throws IOException {
|
||||
* @param f The path we want information from
|
||||
* @param needEmptyDirectoryFlag if true, implementation will calculate
|
||||
* a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
|
||||
* @param probes probes to make
|
||||
* @param probes probes to make.
|
||||
* @return a S3AFileStatus object
|
||||
* @throws FileNotFoundException when the path does not exist
|
||||
* @throws IOException on other problems.
|
||||
@ -2711,7 +2711,7 @@ S3AFileStatus innerGetFileStatus(final Path f,
|
||||
// there was no entry in S3Guard
|
||||
// retrieve the data and update the metadata store in the process.
|
||||
return S3Guard.putAndReturn(metadataStore,
|
||||
s3GetFileStatus(path, key, StatusProbeEnum.ALL, tombstones),
|
||||
s3GetFileStatus(path, key, probes, tombstones),
|
||||
instrumentation,
|
||||
ttlTimeProvider);
|
||||
}
|
||||
@ -2719,31 +2719,70 @@ S3AFileStatus innerGetFileStatus(final Path f,
|
||||
|
||||
/**
|
||||
* Raw {@code getFileStatus} that talks direct to S3.
|
||||
* Used to implement {@link #innerGetFileStatus(Path, boolean)},
|
||||
* Used to implement {@link #innerGetFileStatus(Path, boolean, Set)},
|
||||
* and for direct management of empty directory blobs.
|
||||
*
|
||||
* Checks made, in order:
|
||||
* <ol>
|
||||
* <li>
|
||||
* Head: look for an object at the given key, provided that
|
||||
* the key doesn't end in "/"
|
||||
* </li>
|
||||
* <li>
|
||||
* DirMarker: look for the directory marker -the key with a trailing /
|
||||
* if not passed in.
|
||||
* If an object was found with size 0 bytes, a directory status entry
|
||||
* is returned which declares that the directory is empty.
|
||||
* </li>
|
||||
* <li>
|
||||
* List: issue a LIST on the key (with / if needed), require one
|
||||
* entry to be found for the path to be considered a non-empty directory.
|
||||
* </li>
|
||||
* </ol>
|
||||
*
|
||||
* Notes:
|
||||
* <ul>
|
||||
* <li>
|
||||
* Objects ending in / which are not 0-bytes long are not treated as
|
||||
* directory markers, but instead as files.
|
||||
* </li>
|
||||
* <li>
|
||||
* There's ongoing discussions about whether a dir marker
|
||||
* should be interpreted as an empty dir.
|
||||
* </li>
|
||||
* <li>
|
||||
* The HEAD requests require the permissions to read an object,
|
||||
* including (we believe) the ability to decrypt the file.
|
||||
* At the very least, for SSE-C markers, you need the same key on
|
||||
* the client for the HEAD to work.
|
||||
* </li>
|
||||
* <li>
|
||||
* The List probe needs list permission; it is also more prone to
|
||||
* inconsistency, even on newly created files.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* Retry policy: retry translated.
|
||||
* @param path Qualified path
|
||||
* @param key Key string for the path
|
||||
* @param probes probes to make
|
||||
* @param tombstones tombstones to filter
|
||||
* @return Status
|
||||
* @throws FileNotFoundException when the path does not exist
|
||||
* @throws FileNotFoundException the supplied probes failed.
|
||||
* @throws IOException on other problems.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
@Retries.RetryTranslated
|
||||
private S3AFileStatus s3GetFileStatus(final Path path,
|
||||
String key,
|
||||
S3AFileStatus s3GetFileStatus(final Path path,
|
||||
final String key,
|
||||
final Set<StatusProbeEnum> probes,
|
||||
final Set<Path> tombstones) throws IOException {
|
||||
if (!key.isEmpty() && probes.contains(StatusProbeEnum.Head)) {
|
||||
try {
|
||||
ObjectMetadata meta = getObjectMetadata(key);
|
||||
|
||||
if (objectRepresentsDirectory(key, meta.getContentLength())) {
|
||||
LOG.debug("Found exact file: fake directory");
|
||||
return new S3AFileStatus(Tristate.TRUE, path, username);
|
||||
} else {
|
||||
LOG.debug("Found exact file: normal file");
|
||||
if (!key.isEmpty()) {
|
||||
if (probes.contains(StatusProbeEnum.Head) && !key.endsWith("/")) {
|
||||
try {
|
||||
// look for the simple file
|
||||
ObjectMetadata meta = getObjectMetadata(key);
|
||||
LOG.debug("Found exact file: normal file {}", key);
|
||||
return new S3AFileStatus(meta.getContentLength(),
|
||||
dateToLong(meta.getLastModified()),
|
||||
path,
|
||||
@ -2751,18 +2790,22 @@ private S3AFileStatus s3GetFileStatus(final Path path,
|
||||
username,
|
||||
meta.getETag(),
|
||||
meta.getVersionId());
|
||||
}
|
||||
} catch (AmazonServiceException e) {
|
||||
if (e.getStatusCode() != SC_404) {
|
||||
} catch (AmazonServiceException e) {
|
||||
// if the response is a 404 error, it just means that there is
|
||||
// no file at that path...the remaining checks will be needed.
|
||||
if (e.getStatusCode() != SC_404) {
|
||||
throw translateException("getFileStatus", path, e);
|
||||
}
|
||||
} catch (AmazonClientException e) {
|
||||
throw translateException("getFileStatus", path, e);
|
||||
}
|
||||
} catch (AmazonClientException e) {
|
||||
throw translateException("getFileStatus", path, e);
|
||||
}
|
||||
|
||||
// Either a normal file was not found or the probe was skipped.
|
||||
// because the key ended in "/" or it was not in the set of probes.
|
||||
// Look for the dir marker
|
||||
if (!key.endsWith("/") && probes.contains(StatusProbeEnum.DirMarker)) {
|
||||
String newKey = key + "/";
|
||||
if (probes.contains(StatusProbeEnum.DirMarker)) {
|
||||
String newKey = maybeAddTrailingSlash(key);
|
||||
try {
|
||||
ObjectMetadata meta = getObjectMetadata(newKey);
|
||||
|
||||
@ -2794,8 +2837,8 @@ private S3AFileStatus s3GetFileStatus(final Path path,
|
||||
// execute the list
|
||||
if (probes.contains(StatusProbeEnum.List)) {
|
||||
try {
|
||||
key = maybeAddTrailingSlash(key);
|
||||
S3ListRequest request = createListObjectsRequest(key, "/", 1);
|
||||
String dirKey = maybeAddTrailingSlash(key);
|
||||
S3ListRequest request = createListObjectsRequest(dirKey, "/", 1);
|
||||
|
||||
S3ListResult objects = listObjects(request);
|
||||
|
||||
|
@ -41,4 +41,20 @@ public enum StatusProbeEnum {
|
||||
public static final Set<StatusProbeEnum> DIRECTORIES =
|
||||
EnumSet.of(DirMarker, List);
|
||||
|
||||
/** We only want the HEAD or dir marker. */
|
||||
public static final Set<StatusProbeEnum> HEAD_OR_DIR_MARKER =
|
||||
EnumSet.of(Head, DirMarker);
|
||||
|
||||
/** We only want the HEAD. */
|
||||
public static final Set<StatusProbeEnum> HEAD_ONLY =
|
||||
EnumSet.of(Head);
|
||||
|
||||
/** We only want the dir marker. */
|
||||
public static final Set<StatusProbeEnum> DIR_MARKER_ONLY =
|
||||
EnumSet.of(DirMarker);
|
||||
|
||||
/** We only want the dir marker. */
|
||||
public static final Set<StatusProbeEnum> LIST_ONLY =
|
||||
EnumSet.of(List);
|
||||
|
||||
}
|
||||
|
@ -84,14 +84,18 @@ public void setup() throws Exception {
|
||||
|
||||
private void cleanUpFS(S3AFileSystem fs) {
|
||||
// detach from the (shared) metadata store.
|
||||
fs.setMetadataStore(new NullMetadataStore());
|
||||
if (fs != null) {
|
||||
fs.setMetadataStore(new NullMetadataStore());
|
||||
}
|
||||
|
||||
IOUtils.cleanupWithLogger(LOG, fs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void teardown() throws Exception {
|
||||
fullyAuthFS.delete(testRoot, true);
|
||||
if (fullyAuthFS != null) {
|
||||
fullyAuthFS.delete(testRoot, true);
|
||||
}
|
||||
|
||||
cleanUpFS(fullyAuthFS);
|
||||
cleanUpFS(rawFS);
|
||||
|
@ -18,23 +18,32 @@
|
||||
|
||||
package org.apache.hadoop.fs.s3a;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
|
||||
|
||||
import org.assertj.core.api.Assertions;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.net.URI;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
|
||||
import static org.apache.hadoop.fs.s3a.Statistic.*;
|
||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
|
||||
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
|
||||
@ -43,7 +52,9 @@
|
||||
/**
|
||||
* Use metrics to assert about the cost of file status queries.
|
||||
* {@link S3AFileSystem#getFileStatus(Path)}.
|
||||
* Parameterized on guarded vs raw.
|
||||
*/
|
||||
@RunWith(Parameterized.class)
|
||||
public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
|
||||
|
||||
private MetricDiff metadataRequests;
|
||||
@ -52,9 +63,48 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(ITestS3AFileOperationCost.class);
|
||||
|
||||
/**
|
||||
* Parameterization.
|
||||
*/
|
||||
@Parameterized.Parameters(name = "{0}")
|
||||
public static Collection<Object[]> params() {
|
||||
return Arrays.asList(new Object[][]{
|
||||
{"raw", false},
|
||||
{"guarded", true}
|
||||
});
|
||||
}
|
||||
|
||||
private final String name;
|
||||
|
||||
private final boolean s3guard;
|
||||
|
||||
public ITestS3AFileOperationCost(final String name, final boolean s3guard) {
|
||||
this.name = name;
|
||||
this.s3guard = s3guard;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Configuration createConfiguration() {
|
||||
Configuration conf = super.createConfiguration();
|
||||
String bucketName = getTestBucketName(conf);
|
||||
removeBucketOverrides(bucketName, conf,
|
||||
S3_METADATA_STORE_IMPL);
|
||||
if (!s3guard) {
|
||||
// in a raw run remove all s3guard settings
|
||||
removeBaseAndBucketOverrides(bucketName, conf,
|
||||
S3_METADATA_STORE_IMPL);
|
||||
}
|
||||
disableFilesystemCaching(conf);
|
||||
return conf;
|
||||
}
|
||||
@Override
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
if (s3guard) {
|
||||
// s3guard is required for those test runs where any of the
|
||||
// guard options are set
|
||||
assumeS3GuardState(true, getConfiguration());
|
||||
}
|
||||
S3AFileSystem fs = getFileSystem();
|
||||
metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
|
||||
listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
|
||||
@ -80,6 +130,19 @@ private void resetMetricDiffs() {
|
||||
reset(metadataRequests, listRequests);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that the head and list calls match expectations,
|
||||
* then reset the counters ready for the next operation.
|
||||
* @param head expected HEAD count
|
||||
* @param list expected LIST count
|
||||
*/
|
||||
private void verifyOperationCount(int head, int list) {
|
||||
metadataRequests.assertDiffEquals(head);
|
||||
listRequests.assertDiffEquals(list);
|
||||
metadataRequests.reset();
|
||||
listRequests.reset();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCostOfGetFileStatusOnEmptyDir() throws Throwable {
|
||||
describe("performing getFileStatus on an empty directory");
|
||||
@ -205,12 +268,6 @@ public void testFakeDirectoryDeletion() throws Throwable {
|
||||
+ "clean up activity");
|
||||
S3AFileSystem fs = getFileSystem();
|
||||
|
||||
// As this test uses the s3 metrics to count the number of fake directory
|
||||
// operations, it depends on side effects happening internally. With
|
||||
// metadata store enabled, it is brittle to change. We disable this test
|
||||
// before the internal behavior w/ or w/o metadata store.
|
||||
// assumeFalse(fs.hasMetadataStore());
|
||||
|
||||
Path srcBaseDir = path("src");
|
||||
mkdirs(srcBaseDir);
|
||||
MetricDiff deleteRequests =
|
||||
@ -389,4 +446,75 @@ public String toString() {
|
||||
fs.delete(dest, false);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDirProbes() throws Throwable {
|
||||
describe("Test directory probe cost -raw only");
|
||||
S3AFileSystem fs = getFileSystem();
|
||||
assume("Unguarded FS only", !fs.hasMetadataStore());
|
||||
String dir = "testEmptyDirHeadProbe";
|
||||
Path emptydir = path(dir);
|
||||
// Create the empty directory.
|
||||
fs.mkdirs(emptydir);
|
||||
|
||||
// metrics and assertions.
|
||||
resetMetricDiffs();
|
||||
|
||||
intercept(FileNotFoundException.class, () ->
|
||||
fs.innerGetFileStatus(emptydir, false,
|
||||
StatusProbeEnum.HEAD_ONLY));
|
||||
verifyOperationCount(1, 0);
|
||||
|
||||
// a LIST will find it -but it doesn't consider it an empty dir.
|
||||
S3AFileStatus status = fs.innerGetFileStatus(emptydir, true,
|
||||
StatusProbeEnum.LIST_ONLY);
|
||||
verifyOperationCount(0, 1);
|
||||
Assertions.assertThat(status)
|
||||
.describedAs("LIST output is not considered empty")
|
||||
.matches(s -> !s.isEmptyDirectory().equals(Tristate.TRUE), "is empty");
|
||||
|
||||
// finally, skip all probes and expect no operations toThere are
|
||||
// take place
|
||||
intercept(FileNotFoundException.class, () ->
|
||||
fs.innerGetFileStatus(emptydir, false,
|
||||
EnumSet.noneOf(StatusProbeEnum.class)));
|
||||
verifyOperationCount(0, 0);
|
||||
|
||||
// now add a trailing slash to the key and use the
|
||||
// deep internal s3GetFileStatus method call.
|
||||
String emptyDirTrailingSlash = fs.pathToKey(emptydir.getParent())
|
||||
+ "/" + dir + "/";
|
||||
// A HEAD request does not probe for keys with a trailing /
|
||||
intercept(FileNotFoundException.class, () ->
|
||||
fs.s3GetFileStatus(emptydir, emptyDirTrailingSlash,
|
||||
StatusProbeEnum.HEAD_ONLY, null));
|
||||
verifyOperationCount(0, 0);
|
||||
|
||||
// but ask for a directory marker and you get the entry
|
||||
status = fs.s3GetFileStatus(emptydir,
|
||||
emptyDirTrailingSlash,
|
||||
StatusProbeEnum.DIR_MARKER_ONLY, null);
|
||||
verifyOperationCount(1, 0);
|
||||
assertEquals(emptydir, status.getPath());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateCost() throws Throwable {
|
||||
describe("Test file creation cost -raw only");
|
||||
S3AFileSystem fs = getFileSystem();
|
||||
assume("Unguarded FS only", !fs.hasMetadataStore());
|
||||
resetMetricDiffs();
|
||||
Path testFile = path("testCreateCost");
|
||||
|
||||
// when overwrite is false, the path is checked for existence.
|
||||
try (FSDataOutputStream out = fs.create(testFile, false)) {
|
||||
verifyOperationCount(2, 1);
|
||||
}
|
||||
|
||||
// but when true: only the directory checks take place.
|
||||
try (FSDataOutputStream out = fs.create(testFile, true)) {
|
||||
verifyOperationCount(1, 1);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -96,6 +96,12 @@ protected Configuration createConfiguration() {
|
||||
return configuration;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
Assume.assumeTrue(getFileSystem().hasMetadataStore());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDirectoryListingAuthoritativeTtl() throws Exception {
|
||||
LOG.info("Authoritative mode: {}", authoritative);
|
||||
|
Loading…
x
Reference in New Issue
Block a user