HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan.

This commit is contained in:
Steve Loughran 2016-09-29 16:59:33 +01:00
parent 7296999c89
commit d24933ad3a
6 changed files with 143 additions and 34 deletions

View File

@ -359,7 +359,7 @@ public abstract class AbstractFSContractTestBase extends Assert
assertEquals(text + " wrong read result " + result, -1, result); assertEquals(text + " wrong read result " + result, -1, result);
} }
boolean rename(Path src, Path dst) throws IOException { protected boolean rename(Path src, Path dst) throws IOException {
return getFileSystem().rename(src, dst); return getFileSystem().rename(src, dst);
} }

View File

@ -778,7 +778,7 @@ public class S3AFileSystem extends FileSystem {
copyFile(summary.getKey(), newDstKey, summary.getSize()); copyFile(summary.getKey(), newDstKey, summary.getSize());
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keysToDelete, true); removeKeys(keysToDelete, true, false);
} }
} }
@ -786,7 +786,7 @@ public class S3AFileSystem extends FileSystem {
objects = continueListObjects(objects); objects = continueListObjects(objects);
} else { } else {
if (!keysToDelete.isEmpty()) { if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false); removeKeys(keysToDelete, false, false);
} }
break; break;
} }
@ -1034,17 +1034,25 @@ public class S3AFileSystem extends FileSystem {
* @param keysToDelete collection of keys to delete on the s3-backend * @param keysToDelete collection of keys to delete on the s3-backend
* @param clearKeys clears the keysToDelete-list after processing the list * @param clearKeys clears the keysToDelete-list after processing the list
* when set to true * when set to true
* @param deleteFakeDir indicates whether this is for deleting fake dirs
*/ */
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete, private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
boolean clearKeys) throws AmazonClientException { boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
if (keysToDelete.isEmpty()) {
// no keys
return;
}
if (enableMultiObjectsDelete) { if (enableMultiObjectsDelete) {
deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete)); deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
instrumentation.fileDeleted(keysToDelete.size());
} else { } else {
for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
deleteObject(keyVersion.getKey()); deleteObject(keyVersion.getKey());
} }
}
if (!deleteFakeDir) {
instrumentation.fileDeleted(keysToDelete.size()); instrumentation.fileDeleted(keysToDelete.size());
} else {
instrumentation.fakeDirsDeleted(keysToDelete.size());
} }
if (clearKeys) { if (clearKeys) {
keysToDelete.clear(); keysToDelete.clear();
@ -1132,7 +1140,7 @@ public class S3AFileSystem extends FileSystem {
LOG.debug("Got object to delete {}", summary.getKey()); LOG.debug("Got object to delete {}", summary.getKey());
if (keys.size() == MAX_ENTRIES_TO_DELETE) { if (keys.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keys, true); removeKeys(keys, true, false);
} }
} }
@ -1140,7 +1148,7 @@ public class S3AFileSystem extends FileSystem {
objects = continueListObjects(objects); objects = continueListObjects(objects);
} else { } else {
if (!keys.isEmpty()) { if (!keys.isEmpty()) {
removeKeys(keys, false); removeKeys(keys, false, false);
} }
break; break;
} }
@ -1630,36 +1638,29 @@ public class S3AFileSystem extends FileSystem {
/** /**
* Delete mock parent directories which are no longer needed. * Delete mock parent directories which are no longer needed.
* This code swallows IO exceptions encountered * This code swallows IO exceptions encountered
* @param f path * @param path path
*/ */
private void deleteUnnecessaryFakeDirectories(Path f) { private void deleteUnnecessaryFakeDirectories(Path path) {
while (true) { List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
String key = ""; while (!path.isRoot()) {
String key = pathToKey(path);
key = (key.endsWith("/")) ? key : (key + "/");
keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
path = path.getParent();
}
try { try {
key = pathToKey(f); removeKeys(keysToRemove, false, true);
if (key.isEmpty()) { } catch(AmazonClientException e) {
break;
}
S3AFileStatus status = getFileStatus(f);
if (status.isDirectory() && status.isEmptyDirectory()) {
LOG.debug("Deleting fake directory {}/", key);
deleteObject(key + "/");
}
} catch (IOException | AmazonClientException e) {
LOG.debug("While deleting key {} ", key, e);
instrumentation.errorIgnored(); instrumentation.errorIgnored();
if (LOG.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
sb.append(kv.getKey()).append(",");
}
LOG.debug("While deleting keys {} ", sb.toString(), e);
} }
if (f.isRoot()) {
break;
}
f = f.getParent();
} }
} }
private void createFakeDirectory(final String objectName) private void createFakeDirectory(final String objectName)
throws AmazonClientException, AmazonServiceException, throws AmazonClientException, AmazonServiceException,

View File

@ -75,6 +75,7 @@ public class S3AInstrumentation {
private final MutableCounterLong numberOfFilesCopied; private final MutableCounterLong numberOfFilesCopied;
private final MutableCounterLong bytesOfFilesCopied; private final MutableCounterLong bytesOfFilesCopied;
private final MutableCounterLong numberOfFilesDeleted; private final MutableCounterLong numberOfFilesDeleted;
private final MutableCounterLong numberOfFakeDirectoryDeletes;
private final MutableCounterLong numberOfDirectoriesCreated; private final MutableCounterLong numberOfDirectoriesCreated;
private final MutableCounterLong numberOfDirectoriesDeleted; private final MutableCounterLong numberOfDirectoriesDeleted;
private final Map<String, MutableCounterLong> streamMetrics = private final Map<String, MutableCounterLong> streamMetrics =
@ -134,6 +135,7 @@ public class S3AInstrumentation {
numberOfFilesCopied = counter(FILES_COPIED); numberOfFilesCopied = counter(FILES_COPIED);
bytesOfFilesCopied = counter(FILES_COPIED_BYTES); bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
numberOfFilesDeleted = counter(FILES_DELETED); numberOfFilesDeleted = counter(FILES_DELETED);
numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED); numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED); numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
ignoredErrors = counter(IGNORED_ERRORS); ignoredErrors = counter(IGNORED_ERRORS);
@ -294,6 +296,14 @@ public class S3AInstrumentation {
numberOfFilesDeleted.incr(count); numberOfFilesDeleted.incr(count);
} }
/**
* Indicate that fake directory request was made.
* @param count number of directory entries included in the delete request.
*/
public void fakeDirsDeleted(int count) {
numberOfFakeDirectoryDeletes.incr(count);
}
/** /**
* Indicate that S3A created a directory. * Indicate that S3A created a directory.
*/ */

View File

@ -42,6 +42,10 @@ public enum Statistic {
"Total number of files created through the object store."), "Total number of files created through the object store."),
FILES_DELETED("files_deleted", FILES_DELETED("files_deleted",
"Total number of files deleted from the object store."), "Total number of files deleted from the object store."),
FAKE_DIRECTORIES_CREATED("fake_directories_created",
"Total number of fake directory entries created in the object store."),
FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
"Total number of fake directory deletes submitted to object store."),
IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"), IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE, INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
"Calls of copyFromLocalFile()"), "Calls of copyFromLocalFile()"),

View File

@ -296,13 +296,22 @@ public class S3ATestUtils {
return sb.toString(); return sb.toString();
} }
/**
* Assert that the value of {@link #diff()} matches that expected.
* @param message message to print; metric name is appended
* @param expected expected value.
*/
public void assertDiffEquals(String message, long expected) {
Assert.assertEquals(message + ": " + statistic.getSymbol(),
expected, diff());
}
/** /**
* Assert that the value of {@link #diff()} matches that expected. * Assert that the value of {@link #diff()} matches that expected.
* @param expected expected value. * @param expected expected value.
*/ */
public void assertDiffEquals(long expected) { public void assertDiffEquals(long expected) {
Assert.assertEquals("Count of " + this, assertDiffEquals("Count of " + this, expected);
expected, diff());
} }
/** /**

View File

@ -189,4 +189,89 @@ public class TestS3AFileOperationCost extends AbstractFSContractTestBase {
tmpFile.delete(); tmpFile.delete();
} }
} }
private void reset(MetricDiff... diffs) {
for (MetricDiff diff : diffs) {
diff.reset();
}
}
@Test
public void testFakeDirectoryDeletion() throws Throwable {
describe("Verify whether create file works after renaming a file. "
+ "In S3, rename deletes any fake directories as a part of "
+ "clean up activity");
S3AFileSystem fs = getFileSystem();
Path srcBaseDir = path("src");
mkdirs(srcBaseDir);
MetricDiff deleteRequests =
new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
MetricDiff directoriesDeleted =
new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
MetricDiff fakeDirectoriesDeleted =
new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
MetricDiff directoriesCreated =
new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);
Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
Path srcFilePath = new Path(srcDir, "source.txt");
int srcDirDepth = directoriesInPath(srcDir);
// one dir created, one removed
mkdirs(srcDir);
String state = "after mkdir(srcDir)";
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state, 1);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
// creating a file should trigger demise of the src dir
touch(fs, srcFilePath);
state = "after touch(fs, srcFilePath)";
deleteRequests.assertDiffEquals(state, 1);
directoriesCreated.assertDiffEquals(state, 0);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
Path destBaseDir = path("dest");
Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
Path destFilePath = new Path(destDir, "dest.txt");
mkdirs(destDir);
state = "after mkdir(destDir)";
int destDirDepth = directoriesInPath(destDir);
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state,1);
directoriesDeleted.assertDiffEquals(state,0);
fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
fs.rename(srcFilePath, destFilePath);
state = "after rename(srcFilePath, destFilePath)";
directoriesCreated.assertDiffEquals(state, 1);
// one for the renamed file, one for the parent
deleteRequests.assertDiffEquals(state, 2);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
assertIsFile(destFilePath);
assertIsDirectory(srcDir);
}
private int directoriesInPath(Path path) {
return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
}
} }