diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 26f16a7b232..ff2ba1474df 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -2610,7 +2610,7 @@ public FileStatus getFileStatus(final Path f) throws IOException {
* @param f The path we want information from
* @param needEmptyDirectoryFlag if true, implementation will calculate
* a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
- * @param probes probes to make
+ * @param probes probes to make.
* @return a S3AFileStatus object
* @throws FileNotFoundException when the path does not exist
* @throws IOException on other problems.
@@ -2711,7 +2711,7 @@ S3AFileStatus innerGetFileStatus(final Path f,
// there was no entry in S3Guard
// retrieve the data and update the metadata store in the process.
return S3Guard.putAndReturn(metadataStore,
- s3GetFileStatus(path, key, StatusProbeEnum.ALL, tombstones),
+ s3GetFileStatus(path, key, probes, tombstones),
instrumentation,
ttlTimeProvider);
}
@@ -2719,31 +2719,70 @@ S3AFileStatus innerGetFileStatus(final Path f,
/**
* Raw {@code getFileStatus} that talks direct to S3.
- * Used to implement {@link #innerGetFileStatus(Path, boolean)},
+ * Used to implement {@link #innerGetFileStatus(Path, boolean, Set)},
* and for direct management of empty directory blobs.
+ *
+ * Checks made, in order:
+ *
+ *
+ * Head: look for an object at the given key, provided that
+ * the key doesn't end in "/"
+ *
+ *
+ * DirMarker: look for the directory marker -the key with a trailing /
+ * if not passed in.
+ * If an object was found with size 0 bytes, a directory status entry
+ * is returned which declares that the directory is empty.
+ *
+ *
+ * List: issue a LIST on the key (with / if needed), require one
+ * entry to be found for the path to be considered a non-empty directory.
+ *
+ *
+ *
+ * Notes:
+ *
+ *
+ * Objects ending in / which are not 0-bytes long are not treated as
+ * directory markers, but instead as files.
+ *
+ *
+ * There's ongoing discussions about whether a dir marker
+ * should be interpreted as an empty dir.
+ *
+ *
+ * The HEAD requests require the permissions to read an object,
+ * including (we believe) the ability to decrypt the file.
+ * At the very least, for SSE-C markers, you need the same key on
+ * the client for the HEAD to work.
+ *
+ *
+ * The List probe needs list permission; it is also more prone to
+ * inconsistency, even on newly created files.
+ *
+ *
+ *
* Retry policy: retry translated.
* @param path Qualified path
* @param key Key string for the path
* @param probes probes to make
* @param tombstones tombstones to filter
* @return Status
- * @throws FileNotFoundException when the path does not exist
+ * @throws FileNotFoundException the supplied probes failed.
* @throws IOException on other problems.
*/
+ @VisibleForTesting
@Retries.RetryTranslated
- private S3AFileStatus s3GetFileStatus(final Path path,
- String key,
+ S3AFileStatus s3GetFileStatus(final Path path,
+ final String key,
final Set probes,
final Set tombstones) throws IOException {
- if (!key.isEmpty() && probes.contains(StatusProbeEnum.Head)) {
- try {
- ObjectMetadata meta = getObjectMetadata(key);
-
- if (objectRepresentsDirectory(key, meta.getContentLength())) {
- LOG.debug("Found exact file: fake directory");
- return new S3AFileStatus(Tristate.TRUE, path, username);
- } else {
- LOG.debug("Found exact file: normal file");
+ if (!key.isEmpty()) {
+ if (probes.contains(StatusProbeEnum.Head) && !key.endsWith("/")) {
+ try {
+ // look for the simple file
+ ObjectMetadata meta = getObjectMetadata(key);
+ LOG.debug("Found exact file: normal file {}", key);
return new S3AFileStatus(meta.getContentLength(),
dateToLong(meta.getLastModified()),
path,
@@ -2751,18 +2790,22 @@ private S3AFileStatus s3GetFileStatus(final Path path,
username,
meta.getETag(),
meta.getVersionId());
- }
- } catch (AmazonServiceException e) {
- if (e.getStatusCode() != SC_404) {
+ } catch (AmazonServiceException e) {
+ // if the response is a 404 error, it just means that there is
+ // no file at that path...the remaining checks will be needed.
+ if (e.getStatusCode() != SC_404) {
+ throw translateException("getFileStatus", path, e);
+ }
+ } catch (AmazonClientException e) {
throw translateException("getFileStatus", path, e);
}
- } catch (AmazonClientException e) {
- throw translateException("getFileStatus", path, e);
}
+ // Either a normal file was not found or the probe was skipped.
+ // because the key ended in "/" or it was not in the set of probes.
// Look for the dir marker
- if (!key.endsWith("/") && probes.contains(StatusProbeEnum.DirMarker)) {
- String newKey = key + "/";
+ if (probes.contains(StatusProbeEnum.DirMarker)) {
+ String newKey = maybeAddTrailingSlash(key);
try {
ObjectMetadata meta = getObjectMetadata(newKey);
@@ -2794,8 +2837,8 @@ private S3AFileStatus s3GetFileStatus(final Path path,
// execute the list
if (probes.contains(StatusProbeEnum.List)) {
try {
- key = maybeAddTrailingSlash(key);
- S3ListRequest request = createListObjectsRequest(key, "/", 1);
+ String dirKey = maybeAddTrailingSlash(key);
+ S3ListRequest request = createListObjectsRequest(dirKey, "/", 1);
S3ListResult objects = listObjects(request);
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java
index ca2875c39f8..f843b20ab28 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java
@@ -41,4 +41,20 @@ public enum StatusProbeEnum {
public static final Set DIRECTORIES =
EnumSet.of(DirMarker, List);
+ /** We only want the HEAD or dir marker. */
+ public static final Set HEAD_OR_DIR_MARKER =
+ EnumSet.of(Head, DirMarker);
+
+ /** We only want the HEAD. */
+ public static final Set HEAD_ONLY =
+ EnumSet.of(Head);
+
+ /** We only want the dir marker. */
+ public static final Set DIR_MARKER_ONLY =
+ EnumSet.of(DirMarker);
+
+ /** We only want the dir marker. */
+ public static final Set LIST_ONLY =
+ EnumSet.of(List);
+
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java
index c35a5855d50..eb54c0ee0e7 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java
@@ -84,14 +84,18 @@ public void setup() throws Exception {
private void cleanUpFS(S3AFileSystem fs) {
// detach from the (shared) metadata store.
- fs.setMetadataStore(new NullMetadataStore());
+ if (fs != null) {
+ fs.setMetadataStore(new NullMetadataStore());
+ }
IOUtils.cleanupWithLogger(LOG, fs);
}
@Override
public void teardown() throws Exception {
- fullyAuthFS.delete(testRoot, true);
+ if (fullyAuthFS != null) {
+ fullyAuthFS.delete(testRoot, true);
+ }
cleanUpFS(fullyAuthFS);
cleanUpFS(rawFS);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
index c62176b9b24..e2f7fead466 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
@@ -18,23 +18,32 @@
package org.apache.hadoop.fs.s3a;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
+import org.assertj.core.api.Assertions;
import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URI;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.EnumSet;
import java.util.UUID;
import java.util.concurrent.Callable;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
@@ -43,7 +52,9 @@
/**
* Use metrics to assert about the cost of file status queries.
* {@link S3AFileSystem#getFileStatus(Path)}.
+ * Parameterized on guarded vs raw.
*/
+@RunWith(Parameterized.class)
public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
private MetricDiff metadataRequests;
@@ -52,9 +63,48 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
private static final Logger LOG =
LoggerFactory.getLogger(ITestS3AFileOperationCost.class);
+ /**
+ * Parameterization.
+ */
+ @Parameterized.Parameters(name = "{0}")
+ public static Collection