From d24933ad3aee630e5dee93357882095d1f24d357 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 29 Sep 2016 16:59:33 +0100 Subject: [PATCH] HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan. --- .../contract/AbstractFSContractTestBase.java | 2 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 63 +++++++------- .../hadoop/fs/s3a/S3AInstrumentation.java | 10 +++ .../org/apache/hadoop/fs/s3a/Statistic.java | 4 + .../apache/hadoop/fs/s3a/S3ATestUtils.java | 13 ++- .../fs/s3a/TestS3AFileOperationCost.java | 85 +++++++++++++++++++ 6 files changed, 143 insertions(+), 34 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java index baea968b67a..b2e68f5316e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java @@ -359,7 +359,7 @@ public abstract class AbstractFSContractTestBase extends Assert assertEquals(text + " wrong read result " + result, -1, result); } - boolean rename(Path src, Path dst) throws IOException { + protected boolean rename(Path src, Path dst) throws IOException { return getFileSystem().rename(src, dst); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 3fe7eec6d65..aff74e48ca1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -778,7 +778,7 @@ public class S3AFileSystem extends FileSystem { copyFile(summary.getKey(), newDstKey, summary.getSize()); if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { - removeKeys(keysToDelete, true); + removeKeys(keysToDelete, true, false); } } @@ -786,7 +786,7 @@ public class S3AFileSystem extends FileSystem { objects = continueListObjects(objects); } else { if (!keysToDelete.isEmpty()) { - removeKeys(keysToDelete, false); + removeKeys(keysToDelete, false, false); } break; } @@ -1034,17 +1034,25 @@ public class S3AFileSystem extends FileSystem { * @param keysToDelete collection of keys to delete on the s3-backend * @param clearKeys clears the keysToDelete-list after processing the list * when set to true + * @param deleteFakeDir indicates whether this is for deleting fake dirs */ private void removeKeys(List keysToDelete, - boolean clearKeys) throws AmazonClientException { + boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException { + if (keysToDelete.isEmpty()) { + // no keys + return; + } if (enableMultiObjectsDelete) { deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete)); - instrumentation.fileDeleted(keysToDelete.size()); } else { for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { deleteObject(keyVersion.getKey()); } + } + if (!deleteFakeDir) { instrumentation.fileDeleted(keysToDelete.size()); + } else { + instrumentation.fakeDirsDeleted(keysToDelete.size()); } if (clearKeys) { keysToDelete.clear(); @@ -1132,7 +1140,7 @@ public class S3AFileSystem extends FileSystem { LOG.debug("Got object to delete {}", summary.getKey()); if (keys.size() == MAX_ENTRIES_TO_DELETE) { - removeKeys(keys, true); + removeKeys(keys, true, false); } } @@ -1140,7 +1148,7 @@ public class S3AFileSystem extends FileSystem { objects = continueListObjects(objects); } else { if (!keys.isEmpty()) { - removeKeys(keys, false); + removeKeys(keys, false, false); } break; } @@ -1630,37 +1638,30 @@ public class S3AFileSystem extends FileSystem { /** * Delete mock parent directories which are no longer needed. * This code swallows IO exceptions encountered - * @param f path + * @param path path */ - private void deleteUnnecessaryFakeDirectories(Path f) { - while (true) { - String key = ""; - try { - key = pathToKey(f); - if (key.isEmpty()) { - break; + private void deleteUnnecessaryFakeDirectories(Path path) { + List keysToRemove = new ArrayList<>(); + while (!path.isRoot()) { + String key = pathToKey(path); + key = (key.endsWith("/")) ? key : (key + "/"); + keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key)); + path = path.getParent(); + } + try { + removeKeys(keysToRemove, false, true); + } catch(AmazonClientException e) { + instrumentation.errorIgnored(); + if (LOG.isDebugEnabled()) { + StringBuilder sb = new StringBuilder(); + for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) { + sb.append(kv.getKey()).append(","); } - - S3AFileStatus status = getFileStatus(f); - - if (status.isDirectory() && status.isEmptyDirectory()) { - LOG.debug("Deleting fake directory {}/", key); - deleteObject(key + "/"); - } - } catch (IOException | AmazonClientException e) { - LOG.debug("While deleting key {} ", key, e); - instrumentation.errorIgnored(); + LOG.debug("While deleting keys {} ", sb.toString(), e); } - - if (f.isRoot()) { - break; - } - - f = f.getParent(); } } - private void createFakeDirectory(final String objectName) throws AmazonClientException, AmazonServiceException, InterruptedIOException { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java index 514b9743245..20c77c23fc2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java @@ -75,6 +75,7 @@ public class S3AInstrumentation { private final MutableCounterLong numberOfFilesCopied; private final MutableCounterLong bytesOfFilesCopied; private final MutableCounterLong numberOfFilesDeleted; + private final MutableCounterLong numberOfFakeDirectoryDeletes; private final MutableCounterLong numberOfDirectoriesCreated; private final MutableCounterLong numberOfDirectoriesDeleted; private final Map streamMetrics = @@ -134,6 +135,7 @@ public class S3AInstrumentation { numberOfFilesCopied = counter(FILES_COPIED); bytesOfFilesCopied = counter(FILES_COPIED_BYTES); numberOfFilesDeleted = counter(FILES_DELETED); + numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED); numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED); numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED); ignoredErrors = counter(IGNORED_ERRORS); @@ -294,6 +296,14 @@ public class S3AInstrumentation { numberOfFilesDeleted.incr(count); } + /** + * Indicate that fake directory request was made. + * @param count number of directory entries included in the delete request. + */ + public void fakeDirsDeleted(int count) { + numberOfFakeDirectoryDeletes.incr(count); + } + /** * Indicate that S3A created a directory. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java index 36d163c5fc2..67b2e80e929 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -42,6 +42,10 @@ public enum Statistic { "Total number of files created through the object store."), FILES_DELETED("files_deleted", "Total number of files deleted from the object store."), + FAKE_DIRECTORIES_CREATED("fake_directories_created", + "Total number of fake directory entries created in the object store."), + FAKE_DIRECTORIES_DELETED("fake_directories_deleted", + "Total number of fake directory deletes submitted to object store."), IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"), INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE, "Calls of copyFromLocalFile()"), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 39c60287d06..8f484d84bee 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -296,13 +296,22 @@ public class S3ATestUtils { return sb.toString(); } + /** + * Assert that the value of {@link #diff()} matches that expected. + * @param message message to print; metric name is appended + * @param expected expected value. + */ + public void assertDiffEquals(String message, long expected) { + Assert.assertEquals(message + ": " + statistic.getSymbol(), + expected, diff()); + } + /** * Assert that the value of {@link #diff()} matches that expected. * @param expected expected value. */ public void assertDiffEquals(long expected) { - Assert.assertEquals("Count of " + this, - expected, diff()); + assertDiffEquals("Count of " + this, expected); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java index 0e9bcfd18cf..179eb88a55e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java @@ -189,4 +189,89 @@ public class TestS3AFileOperationCost extends AbstractFSContractTestBase { tmpFile.delete(); } } + + private void reset(MetricDiff... diffs) { + for (MetricDiff diff : diffs) { + diff.reset(); + } + } + + @Test + public void testFakeDirectoryDeletion() throws Throwable { + describe("Verify whether create file works after renaming a file. " + + "In S3, rename deletes any fake directories as a part of " + + "clean up activity"); + S3AFileSystem fs = getFileSystem(); + Path srcBaseDir = path("src"); + mkdirs(srcBaseDir); + MetricDiff deleteRequests = + new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS); + MetricDiff directoriesDeleted = + new MetricDiff(fs, Statistic.DIRECTORIES_DELETED); + MetricDiff fakeDirectoriesDeleted = + new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED); + MetricDiff directoriesCreated = + new MetricDiff(fs, Statistic.DIRECTORIES_CREATED); + + Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6"); + Path srcFilePath = new Path(srcDir, "source.txt"); + int srcDirDepth = directoriesInPath(srcDir); + // one dir created, one removed + mkdirs(srcDir); + String state = "after mkdir(srcDir)"; + directoriesCreated.assertDiffEquals(state, 1); +/* TODO: uncomment once HADOOP-13222 is in + deleteRequests.assertDiffEquals(state, 1); + directoriesDeleted.assertDiffEquals(state, 0); + fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth); +*/ + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + // creating a file should trigger demise of the src dir + touch(fs, srcFilePath); + state = "after touch(fs, srcFilePath)"; + deleteRequests.assertDiffEquals(state, 1); + directoriesCreated.assertDiffEquals(state, 0); + directoriesDeleted.assertDiffEquals(state, 0); + fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth); + + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + Path destBaseDir = path("dest"); + Path destDir = new Path(destBaseDir, "1/2/3/4/5/6"); + Path destFilePath = new Path(destDir, "dest.txt"); + mkdirs(destDir); + state = "after mkdir(destDir)"; + + int destDirDepth = directoriesInPath(destDir); + directoriesCreated.assertDiffEquals(state, 1); +/* TODO: uncomment once HADOOP-13222 is in + deleteRequests.assertDiffEquals(state,1); + directoriesDeleted.assertDiffEquals(state,0); + fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth); +*/ + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + fs.rename(srcFilePath, destFilePath); + state = "after rename(srcFilePath, destFilePath)"; + directoriesCreated.assertDiffEquals(state, 1); + // one for the renamed file, one for the parent + deleteRequests.assertDiffEquals(state, 2); + directoriesDeleted.assertDiffEquals(state, 0); + fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth); + + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + assertIsFile(destFilePath); + assertIsDirectory(srcDir); + } + + private int directoriesInPath(Path path) { + return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent()); + } + }