From 806d84b79c97cd0bbed324f6a324d7c110a6fd87 Mon Sep 17 00:00:00 2001 From: jimmy-zuber-amzn <67486813+jimmy-zuber-amzn@users.noreply.github.com> Date: Mon, 13 Jul 2020 11:07:48 -0700 Subject: [PATCH] HADOOP-17105. S3AFS - Do not attempt to resolve symlinks in globStatus (#2113) Contributed by Jimmy Zuber. --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 10 +++-- .../fs/s3a/ITestS3AFileOperationCost.java | 44 +++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e5b08f1bb90..f123f6b3376 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3980,6 +3980,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, /** * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. + * Override superclass so as to disable symlink resolution as symlinks + * are not supported by S3A. * {@inheritDoc} */ @Override @@ -3988,9 +3990,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, } /** - * Override superclass so as to disable symlink resolution and so avoid - * some calls to the FS which may have problems when the store is being - * inconsistent. + * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. + * Override superclass so as to disable symlink resolution as symlinks + * are not supported by S3A. * {@inheritDoc} */ @Override @@ -4002,7 +4004,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, return Globber.createGlobber(this) .withPathPattern(pathPattern) .withPathFiltern(filter) - .withResolveSymlinks(true) + .withResolveSymlinks(false) .build() .glob(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index b2b983c4d4d..cd8d7d5d53a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -574,4 +574,48 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase { } } + + @Test + public void testCostOfGlobStatus() throws Throwable { + describe("Test globStatus has expected cost"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + + Path basePath = path("testCostOfGlobStatus/nextFolder/"); + + // create a bunch of files + int filesToCreate = 10; + for (int i = 0; i < filesToCreate; i++) { + try (FSDataOutputStream out = fs.create(basePath.suffix("/" + i))) { + verifyOperationCount(1, 1); + } + } + + fs.globStatus(basePath.suffix("/*")); + // 2 head + 1 list from getFileStatus on path, + // plus 1 list to match the glob pattern + verifyOperationCount(2, 2); + } + + @Test + public void testCostOfGlobStatusNoSymlinkResolution() throws Throwable { + describe("Test globStatus does not attempt to resolve symlinks"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + + Path basePath = path("testCostOfGlobStatusNoSymlinkResolution/f/"); + + // create a single file, globStatus returning a single file on a pattern + // triggers attempts at symlinks resolution if configured + String fileName = "/notASymlinkDOntResolveMeLikeOne"; + try (FSDataOutputStream out = fs.create(basePath.suffix(fileName))) { + verifyOperationCount(1, 1); + } + + fs.globStatus(basePath.suffix("/*")); + // unguarded: 2 head + 1 list from getFileStatus on path, + // plus 1 list to match the glob pattern + // no additional operations from symlink resolution + verifyOperationCount(2, 2); + } }