HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan.

This commit is contained in:
Steve Loughran 2016-09-29 16:59:33 +01:00
parent 7296999c89
commit d24933ad3a
6 changed files with 143 additions and 34 deletions

View File

@ -359,7 +359,7 @@ protected void assertMinusOne(String text, int result) {
assertEquals(text + " wrong read result " + result, -1, result);
}
boolean rename(Path src, Path dst) throws IOException {
protected boolean rename(Path src, Path dst) throws IOException {
return getFileSystem().rename(src, dst);
}

View File

@ -778,7 +778,7 @@ private boolean innerRename(Path src, Path dst) throws IOException,
copyFile(summary.getKey(), newDstKey, summary.getSize());
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keysToDelete, true);
removeKeys(keysToDelete, true, false);
}
}
@ -786,7 +786,7 @@ private boolean innerRename(Path src, Path dst) throws IOException,
objects = continueListObjects(objects);
} else {
if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false);
removeKeys(keysToDelete, false, false);
}
break;
}
@ -1034,17 +1034,25 @@ public void incrementPutProgressStatistics(String key, long bytes) {
* @param keysToDelete collection of keys to delete on the s3-backend
* @param clearKeys clears the keysToDelete-list after processing the list
* when set to true
* @param deleteFakeDir indicates whether this is for deleting fake dirs
*/
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
boolean clearKeys) throws AmazonClientException {
boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
if (keysToDelete.isEmpty()) {
// no keys
return;
}
if (enableMultiObjectsDelete) {
deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
instrumentation.fileDeleted(keysToDelete.size());
} else {
for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
deleteObject(keyVersion.getKey());
}
}
if (!deleteFakeDir) {
instrumentation.fileDeleted(keysToDelete.size());
} else {
instrumentation.fakeDirsDeleted(keysToDelete.size());
}
if (clearKeys) {
keysToDelete.clear();
@ -1132,7 +1140,7 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
LOG.debug("Got object to delete {}", summary.getKey());
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keys, true);
removeKeys(keys, true, false);
}
}
@ -1140,7 +1148,7 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
objects = continueListObjects(objects);
} else {
if (!keys.isEmpty()) {
removeKeys(keys, false);
removeKeys(keys, false, false);
}
break;
}
@ -1630,36 +1638,29 @@ public void finishedWrite(String key) {
/**
* Delete mock parent directories which are no longer needed.
* This code swallows IO exceptions encountered
* @param f path
* @param path path
*/
private void deleteUnnecessaryFakeDirectories(Path f) {
while (true) {
String key = "";
private void deleteUnnecessaryFakeDirectories(Path path) {
List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
while (!path.isRoot()) {
String key = pathToKey(path);
key = (key.endsWith("/")) ? key : (key + "/");
keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
path = path.getParent();
}
try {
key = pathToKey(f);
if (key.isEmpty()) {
break;
}
S3AFileStatus status = getFileStatus(f);
if (status.isDirectory() && status.isEmptyDirectory()) {
LOG.debug("Deleting fake directory {}/", key);
deleteObject(key + "/");
}
} catch (IOException | AmazonClientException e) {
LOG.debug("While deleting key {} ", key, e);
removeKeys(keysToRemove, false, true);
} catch(AmazonClientException e) {
instrumentation.errorIgnored();
if (LOG.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
sb.append(kv.getKey()).append(",");
}
LOG.debug("While deleting keys {} ", sb.toString(), e);
}
if (f.isRoot()) {
break;
}
f = f.getParent();
}
}
private void createFakeDirectory(final String objectName)
throws AmazonClientException, AmazonServiceException,

View File

@ -75,6 +75,7 @@ public class S3AInstrumentation {
private final MutableCounterLong numberOfFilesCopied;
private final MutableCounterLong bytesOfFilesCopied;
private final MutableCounterLong numberOfFilesDeleted;
private final MutableCounterLong numberOfFakeDirectoryDeletes;
private final MutableCounterLong numberOfDirectoriesCreated;
private final MutableCounterLong numberOfDirectoriesDeleted;
private final Map<String, MutableCounterLong> streamMetrics =
@ -134,6 +135,7 @@ public S3AInstrumentation(URI name) {
numberOfFilesCopied = counter(FILES_COPIED);
bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
numberOfFilesDeleted = counter(FILES_DELETED);
numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
ignoredErrors = counter(IGNORED_ERRORS);
@ -294,6 +296,14 @@ public void fileDeleted(int count) {
numberOfFilesDeleted.incr(count);
}
/**
* Indicate that fake directory request was made.
* @param count number of directory entries included in the delete request.
*/
public void fakeDirsDeleted(int count) {
numberOfFakeDirectoryDeletes.incr(count);
}
/**
* Indicate that S3A created a directory.
*/

View File

@ -42,6 +42,10 @@ public enum Statistic {
"Total number of files created through the object store."),
FILES_DELETED("files_deleted",
"Total number of files deleted from the object store."),
FAKE_DIRECTORIES_CREATED("fake_directories_created",
"Total number of fake directory entries created in the object store."),
FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
"Total number of fake directory deletes submitted to object store."),
IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
"Calls of copyFromLocalFile()"),

View File

@ -296,13 +296,22 @@ public String toString() {
return sb.toString();
}
/**
* Assert that the value of {@link #diff()} matches that expected.
* @param message message to print; metric name is appended
* @param expected expected value.
*/
public void assertDiffEquals(String message, long expected) {
Assert.assertEquals(message + ": " + statistic.getSymbol(),
expected, diff());
}
/**
* Assert that the value of {@link #diff()} matches that expected.
* @param expected expected value.
*/
public void assertDiffEquals(long expected) {
Assert.assertEquals("Count of " + this,
expected, diff());
assertDiffEquals("Count of " + this, expected);
}
/**

View File

@ -189,4 +189,89 @@ public void testCostOfCopyFromLocalFile() throws Throwable {
tmpFile.delete();
}
}
private void reset(MetricDiff... diffs) {
for (MetricDiff diff : diffs) {
diff.reset();
}
}
@Test
public void testFakeDirectoryDeletion() throws Throwable {
describe("Verify whether create file works after renaming a file. "
+ "In S3, rename deletes any fake directories as a part of "
+ "clean up activity");
S3AFileSystem fs = getFileSystem();
Path srcBaseDir = path("src");
mkdirs(srcBaseDir);
MetricDiff deleteRequests =
new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
MetricDiff directoriesDeleted =
new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
MetricDiff fakeDirectoriesDeleted =
new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
MetricDiff directoriesCreated =
new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);
Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
Path srcFilePath = new Path(srcDir, "source.txt");
int srcDirDepth = directoriesInPath(srcDir);
// one dir created, one removed
mkdirs(srcDir);
String state = "after mkdir(srcDir)";
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state, 1);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
// creating a file should trigger demise of the src dir
touch(fs, srcFilePath);
state = "after touch(fs, srcFilePath)";
deleteRequests.assertDiffEquals(state, 1);
directoriesCreated.assertDiffEquals(state, 0);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
Path destBaseDir = path("dest");
Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
Path destFilePath = new Path(destDir, "dest.txt");
mkdirs(destDir);
state = "after mkdir(destDir)";
int destDirDepth = directoriesInPath(destDir);
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state,1);
directoriesDeleted.assertDiffEquals(state,0);
fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
fs.rename(srcFilePath, destFilePath);
state = "after rename(srcFilePath, destFilePath)";
directoriesCreated.assertDiffEquals(state, 1);
// one for the renamed file, one for the parent
deleteRequests.assertDiffEquals(state, 2);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
assertIsFile(destFilePath);
assertIsDirectory(srcDir);
}
private int directoriesInPath(Path path) {
return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
}
}