diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 26f16a7b232..ff2ba1474df 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -2610,7 +2610,7 @@ public FileStatus getFileStatus(final Path f) throws IOException { * @param f The path we want information from * @param needEmptyDirectoryFlag if true, implementation will calculate * a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()} - * @param probes probes to make + * @param probes probes to make. * @return a S3AFileStatus object * @throws FileNotFoundException when the path does not exist * @throws IOException on other problems. @@ -2711,7 +2711,7 @@ S3AFileStatus innerGetFileStatus(final Path f, // there was no entry in S3Guard // retrieve the data and update the metadata store in the process. return S3Guard.putAndReturn(metadataStore, - s3GetFileStatus(path, key, StatusProbeEnum.ALL, tombstones), + s3GetFileStatus(path, key, probes, tombstones), instrumentation, ttlTimeProvider); } @@ -2719,31 +2719,70 @@ S3AFileStatus innerGetFileStatus(final Path f, /** * Raw {@code getFileStatus} that talks direct to S3. - * Used to implement {@link #innerGetFileStatus(Path, boolean)}, + * Used to implement {@link #innerGetFileStatus(Path, boolean, Set)}, * and for direct management of empty directory blobs. + * + * Checks made, in order: + *
    + *
  1. + * Head: look for an object at the given key, provided that + * the key doesn't end in "/" + *
  2. + *
  3. + * DirMarker: look for the directory marker -the key with a trailing / + * if not passed in. + * If an object was found with size 0 bytes, a directory status entry + * is returned which declares that the directory is empty. + *
  4. + *
  5. + * List: issue a LIST on the key (with / if needed), require one + * entry to be found for the path to be considered a non-empty directory. + *
  6. + *
+ * + * Notes: + * + * * Retry policy: retry translated. * @param path Qualified path * @param key Key string for the path * @param probes probes to make * @param tombstones tombstones to filter * @return Status - * @throws FileNotFoundException when the path does not exist + * @throws FileNotFoundException the supplied probes failed. * @throws IOException on other problems. */ + @VisibleForTesting @Retries.RetryTranslated - private S3AFileStatus s3GetFileStatus(final Path path, - String key, + S3AFileStatus s3GetFileStatus(final Path path, + final String key, final Set probes, final Set tombstones) throws IOException { - if (!key.isEmpty() && probes.contains(StatusProbeEnum.Head)) { - try { - ObjectMetadata meta = getObjectMetadata(key); - - if (objectRepresentsDirectory(key, meta.getContentLength())) { - LOG.debug("Found exact file: fake directory"); - return new S3AFileStatus(Tristate.TRUE, path, username); - } else { - LOG.debug("Found exact file: normal file"); + if (!key.isEmpty()) { + if (probes.contains(StatusProbeEnum.Head) && !key.endsWith("/")) { + try { + // look for the simple file + ObjectMetadata meta = getObjectMetadata(key); + LOG.debug("Found exact file: normal file {}", key); return new S3AFileStatus(meta.getContentLength(), dateToLong(meta.getLastModified()), path, @@ -2751,18 +2790,22 @@ private S3AFileStatus s3GetFileStatus(final Path path, username, meta.getETag(), meta.getVersionId()); - } - } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404) { + } catch (AmazonServiceException e) { + // if the response is a 404 error, it just means that there is + // no file at that path...the remaining checks will be needed. + if (e.getStatusCode() != SC_404) { + throw translateException("getFileStatus", path, e); + } + } catch (AmazonClientException e) { throw translateException("getFileStatus", path, e); } - } catch (AmazonClientException e) { - throw translateException("getFileStatus", path, e); } + // Either a normal file was not found or the probe was skipped. + // because the key ended in "/" or it was not in the set of probes. // Look for the dir marker - if (!key.endsWith("/") && probes.contains(StatusProbeEnum.DirMarker)) { - String newKey = key + "/"; + if (probes.contains(StatusProbeEnum.DirMarker)) { + String newKey = maybeAddTrailingSlash(key); try { ObjectMetadata meta = getObjectMetadata(newKey); @@ -2794,8 +2837,8 @@ private S3AFileStatus s3GetFileStatus(final Path path, // execute the list if (probes.contains(StatusProbeEnum.List)) { try { - key = maybeAddTrailingSlash(key); - S3ListRequest request = createListObjectsRequest(key, "/", 1); + String dirKey = maybeAddTrailingSlash(key); + S3ListRequest request = createListObjectsRequest(dirKey, "/", 1); S3ListResult objects = listObjects(request); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java index ca2875c39f8..f843b20ab28 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java @@ -41,4 +41,20 @@ public enum StatusProbeEnum { public static final Set DIRECTORIES = EnumSet.of(DirMarker, List); + /** We only want the HEAD or dir marker. */ + public static final Set HEAD_OR_DIR_MARKER = + EnumSet.of(Head, DirMarker); + + /** We only want the HEAD. */ + public static final Set HEAD_ONLY = + EnumSet.of(Head); + + /** We only want the dir marker. */ + public static final Set DIR_MARKER_ONLY = + EnumSet.of(DirMarker); + + /** We only want the dir marker. */ + public static final Set LIST_ONLY = + EnumSet.of(List); + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java index c35a5855d50..eb54c0ee0e7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java @@ -84,14 +84,18 @@ public void setup() throws Exception { private void cleanUpFS(S3AFileSystem fs) { // detach from the (shared) metadata store. - fs.setMetadataStore(new NullMetadataStore()); + if (fs != null) { + fs.setMetadataStore(new NullMetadataStore()); + } IOUtils.cleanupWithLogger(LOG, fs); } @Override public void teardown() throws Exception { - fullyAuthFS.delete(testRoot, true); + if (fullyAuthFS != null) { + fullyAuthFS.delete(testRoot, true); + } cleanUpFS(fullyAuthFS); cleanUpFS(rawFS); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index c62176b9b24..e2f7fead466 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -18,23 +18,32 @@ package org.apache.hadoop.fs.s3a; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; +import org.assertj.core.api.Assertions; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileNotFoundException; import java.net.URI; +import java.util.Arrays; +import java.util.Collection; +import java.util.EnumSet; import java.util.UUID; import java.util.concurrent.Callable; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.test.GenericTestUtils.getTestDir; @@ -43,7 +52,9 @@ /** * Use metrics to assert about the cost of file status queries. * {@link S3AFileSystem#getFileStatus(Path)}. + * Parameterized on guarded vs raw. */ +@RunWith(Parameterized.class) public class ITestS3AFileOperationCost extends AbstractS3ATestBase { private MetricDiff metadataRequests; @@ -52,9 +63,48 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase { private static final Logger LOG = LoggerFactory.getLogger(ITestS3AFileOperationCost.class); + /** + * Parameterization. + */ + @Parameterized.Parameters(name = "{0}") + public static Collection params() { + return Arrays.asList(new Object[][]{ + {"raw", false}, + {"guarded", true} + }); + } + + private final String name; + + private final boolean s3guard; + + public ITestS3AFileOperationCost(final String name, final boolean s3guard) { + this.name = name; + this.s3guard = s3guard; + } + + @Override + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + String bucketName = getTestBucketName(conf); + removeBucketOverrides(bucketName, conf, + S3_METADATA_STORE_IMPL); + if (!s3guard) { + // in a raw run remove all s3guard settings + removeBaseAndBucketOverrides(bucketName, conf, + S3_METADATA_STORE_IMPL); + } + disableFilesystemCaching(conf); + return conf; + } @Override public void setup() throws Exception { super.setup(); + if (s3guard) { + // s3guard is required for those test runs where any of the + // guard options are set + assumeS3GuardState(true, getConfiguration()); + } S3AFileSystem fs = getFileSystem(); metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS); listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS); @@ -80,6 +130,19 @@ private void resetMetricDiffs() { reset(metadataRequests, listRequests); } + /** + * Verify that the head and list calls match expectations, + * then reset the counters ready for the next operation. + * @param head expected HEAD count + * @param list expected LIST count + */ + private void verifyOperationCount(int head, int list) { + metadataRequests.assertDiffEquals(head); + listRequests.assertDiffEquals(list); + metadataRequests.reset(); + listRequests.reset(); + } + @Test public void testCostOfGetFileStatusOnEmptyDir() throws Throwable { describe("performing getFileStatus on an empty directory"); @@ -205,12 +268,6 @@ public void testFakeDirectoryDeletion() throws Throwable { + "clean up activity"); S3AFileSystem fs = getFileSystem(); - // As this test uses the s3 metrics to count the number of fake directory - // operations, it depends on side effects happening internally. With - // metadata store enabled, it is brittle to change. We disable this test - // before the internal behavior w/ or w/o metadata store. -// assumeFalse(fs.hasMetadataStore()); - Path srcBaseDir = path("src"); mkdirs(srcBaseDir); MetricDiff deleteRequests = @@ -389,4 +446,75 @@ public String toString() { fs.delete(dest, false); } } + + @Test + public void testDirProbes() throws Throwable { + describe("Test directory probe cost -raw only"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + String dir = "testEmptyDirHeadProbe"; + Path emptydir = path(dir); + // Create the empty directory. + fs.mkdirs(emptydir); + + // metrics and assertions. + resetMetricDiffs(); + + intercept(FileNotFoundException.class, () -> + fs.innerGetFileStatus(emptydir, false, + StatusProbeEnum.HEAD_ONLY)); + verifyOperationCount(1, 0); + + // a LIST will find it -but it doesn't consider it an empty dir. + S3AFileStatus status = fs.innerGetFileStatus(emptydir, true, + StatusProbeEnum.LIST_ONLY); + verifyOperationCount(0, 1); + Assertions.assertThat(status) + .describedAs("LIST output is not considered empty") + .matches(s -> !s.isEmptyDirectory().equals(Tristate.TRUE), "is empty"); + + // finally, skip all probes and expect no operations toThere are + // take place + intercept(FileNotFoundException.class, () -> + fs.innerGetFileStatus(emptydir, false, + EnumSet.noneOf(StatusProbeEnum.class))); + verifyOperationCount(0, 0); + + // now add a trailing slash to the key and use the + // deep internal s3GetFileStatus method call. + String emptyDirTrailingSlash = fs.pathToKey(emptydir.getParent()) + + "/" + dir + "/"; + // A HEAD request does not probe for keys with a trailing / + intercept(FileNotFoundException.class, () -> + fs.s3GetFileStatus(emptydir, emptyDirTrailingSlash, + StatusProbeEnum.HEAD_ONLY, null)); + verifyOperationCount(0, 0); + + // but ask for a directory marker and you get the entry + status = fs.s3GetFileStatus(emptydir, + emptyDirTrailingSlash, + StatusProbeEnum.DIR_MARKER_ONLY, null); + verifyOperationCount(1, 0); + assertEquals(emptydir, status.getPath()); + } + + @Test + public void testCreateCost() throws Throwable { + describe("Test file creation cost -raw only"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + resetMetricDiffs(); + Path testFile = path("testCreateCost"); + + // when overwrite is false, the path is checked for existence. + try (FSDataOutputStream out = fs.create(testFile, false)) { + verifyOperationCount(2, 1); + } + + // but when true: only the directory checks take place. + try (FSDataOutputStream out = fs.create(testFile, true)) { + verifyOperationCount(1, 1); + } + + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java index fb539a812f5..b068f3db319 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java @@ -96,6 +96,12 @@ protected Configuration createConfiguration() { return configuration; } + @Override + public void setup() throws Exception { + super.setup(); + Assume.assumeTrue(getFileSystem().hasMetadataStore()); + } + @Test public void testDirectoryListingAuthoritativeTtl() throws Exception { LOG.info("Authoritative mode: {}", authoritative);