HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan.

This commit is contained in:
Steve Loughran 2016-09-29 16:59:33 +01:00
parent a1b8251bf7
commit ee0c722dc8
6 changed files with 143 additions and 34 deletions

View File

@ -359,7 +359,7 @@ public abstract class AbstractFSContractTestBase extends Assert
assertEquals(text + " wrong read result " + result, -1, result);
}
boolean rename(Path src, Path dst) throws IOException {
protected boolean rename(Path src, Path dst) throws IOException {
return getFileSystem().rename(src, dst);
}

View File

@ -668,7 +668,7 @@ public class S3AFileSystem extends FileSystem {
copyFile(summary.getKey(), newDstKey, summary.getSize());
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keysToDelete, true);
removeKeys(keysToDelete, true, false);
}
}
@ -676,7 +676,7 @@ public class S3AFileSystem extends FileSystem {
objects = continueListObjects(objects);
} else {
if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false);
removeKeys(keysToDelete, false, false);
}
break;
}
@ -924,17 +924,25 @@ public class S3AFileSystem extends FileSystem {
* @param keysToDelete collection of keys to delete on the s3-backend
* @param clearKeys clears the keysToDelete-list after processing the list
* when set to true
* @param deleteFakeDir indicates whether this is for deleting fake dirs
*/
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
boolean clearKeys) throws AmazonClientException {
boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
if (keysToDelete.isEmpty()) {
// no keys
return;
}
if (enableMultiObjectsDelete) {
deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
instrumentation.fileDeleted(keysToDelete.size());
} else {
for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
deleteObject(keyVersion.getKey());
}
}
if (!deleteFakeDir) {
instrumentation.fileDeleted(keysToDelete.size());
} else {
instrumentation.fakeDirsDeleted(keysToDelete.size());
}
if (clearKeys) {
keysToDelete.clear();
@ -1017,7 +1025,7 @@ public class S3AFileSystem extends FileSystem {
LOG.debug("Got object to delete {}", summary.getKey());
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keys, true);
removeKeys(keys, true, false);
}
}
@ -1025,7 +1033,7 @@ public class S3AFileSystem extends FileSystem {
objects = continueListObjects(objects);
} else {
if (!keys.isEmpty()) {
removeKeys(keys, false);
removeKeys(keys, false, false);
}
break;
}
@ -1504,37 +1512,30 @@ public class S3AFileSystem extends FileSystem {
/**
* Delete mock parent directories which are no longer needed.
* This code swallows IO exceptions encountered
* @param f path
* @param path path
*/
private void deleteUnnecessaryFakeDirectories(Path f) {
while (true) {
String key = "";
try {
key = pathToKey(f);
if (key.isEmpty()) {
break;
private void deleteUnnecessaryFakeDirectories(Path path) {
List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
while (!path.isRoot()) {
String key = pathToKey(path);
key = (key.endsWith("/")) ? key : (key + "/");
keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
path = path.getParent();
}
try {
removeKeys(keysToRemove, false, true);
} catch(AmazonClientException e) {
instrumentation.errorIgnored();
if (LOG.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
sb.append(kv.getKey()).append(",");
}
S3AFileStatus status = getFileStatus(f);
if (status.isDirectory() && status.isEmptyDirectory()) {
LOG.debug("Deleting fake directory {}/", key);
deleteObject(key + "/");
}
} catch (IOException | AmazonClientException e) {
LOG.debug("While deleting key {} ", key, e);
instrumentation.errorIgnored();
LOG.debug("While deleting keys {} ", sb.toString(), e);
}
if (f.isRoot()) {
break;
}
f = f.getParent();
}
}
private void createFakeDirectory(final String objectName)
throws AmazonClientException, AmazonServiceException,
InterruptedIOException {

View File

@ -75,6 +75,7 @@ public class S3AInstrumentation {
private final MutableCounterLong numberOfFilesCopied;
private final MutableCounterLong bytesOfFilesCopied;
private final MutableCounterLong numberOfFilesDeleted;
private final MutableCounterLong numberOfFakeDirectoryDeletes;
private final MutableCounterLong numberOfDirectoriesCreated;
private final MutableCounterLong numberOfDirectoriesDeleted;
private final Map<String, MutableCounterLong> streamMetrics =
@ -135,6 +136,7 @@ public class S3AInstrumentation {
numberOfFilesCopied = counter(FILES_COPIED);
bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
numberOfFilesDeleted = counter(FILES_DELETED);
numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
ignoredErrors = counter(IGNORED_ERRORS);
@ -295,6 +297,14 @@ public class S3AInstrumentation {
numberOfFilesDeleted.incr(count);
}
/**
* Indicate that fake directory request was made.
* @param count number of directory entries included in the delete request.
*/
public void fakeDirsDeleted(int count) {
numberOfFakeDirectoryDeletes.incr(count);
}
/**
* Indicate that S3A created a directory.
*/

View File

@ -42,6 +42,10 @@ public enum Statistic {
"Total number of files created through the object store."),
FILES_DELETED("files_deleted",
"Total number of files deleted from the object store."),
FAKE_DIRECTORIES_CREATED("fake_directories_created",
"Total number of fake directory entries created in the object store."),
FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
"Total number of fake directory deletes submitted to object store."),
IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
"Calls of copyFromLocalFile()"),

View File

@ -188,4 +188,89 @@ public class ITestS3AFileOperationCost extends AbstractFSContractTestBase {
tmpFile.delete();
}
}
private void reset(MetricDiff... diffs) {
for (MetricDiff diff : diffs) {
diff.reset();
}
}
@Test
public void testFakeDirectoryDeletion() throws Throwable {
describe("Verify whether create file works after renaming a file. "
+ "In S3, rename deletes any fake directories as a part of "
+ "clean up activity");
S3AFileSystem fs = getFileSystem();
Path srcBaseDir = path("src");
mkdirs(srcBaseDir);
MetricDiff deleteRequests =
new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
MetricDiff directoriesDeleted =
new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
MetricDiff fakeDirectoriesDeleted =
new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
MetricDiff directoriesCreated =
new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);
Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
Path srcFilePath = new Path(srcDir, "source.txt");
int srcDirDepth = directoriesInPath(srcDir);
// one dir created, one removed
mkdirs(srcDir);
String state = "after mkdir(srcDir)";
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state, 1);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
// creating a file should trigger demise of the src dir
touch(fs, srcFilePath);
state = "after touch(fs, srcFilePath)";
deleteRequests.assertDiffEquals(state, 1);
directoriesCreated.assertDiffEquals(state, 0);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
Path destBaseDir = path("dest");
Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
Path destFilePath = new Path(destDir, "dest.txt");
mkdirs(destDir);
state = "after mkdir(destDir)";
int destDirDepth = directoriesInPath(destDir);
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state,1);
directoriesDeleted.assertDiffEquals(state,0);
fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
fs.rename(srcFilePath, destFilePath);
state = "after rename(srcFilePath, destFilePath)";
directoriesCreated.assertDiffEquals(state, 1);
// one for the renamed file, one for the parent
deleteRequests.assertDiffEquals(state, 2);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);
assertIsFile(destFilePath);
assertIsDirectory(srcDir);
}
private int directoriesInPath(Path path) {
return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
}
}

View File

@ -296,13 +296,22 @@ public class S3ATestUtils {
return sb.toString();
}
/**
* Assert that the value of {@link #diff()} matches that expected.
* @param message message to print; metric name is appended
* @param expected expected value.
*/
public void assertDiffEquals(String message, long expected) {
Assert.assertEquals(message + ": " + statistic.getSymbol(),
expected, diff());
}
/**
* Assert that the value of {@link #diff()} matches that expected.
* @param expected expected value.
*/
public void assertDiffEquals(long expected) {
Assert.assertEquals("Count of " + this,
expected, diff());
assertDiffEquals("Count of " + this, expected);
}
/**