HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan.
This commit is contained in:
parent
7296999c89
commit
d24933ad3a
|
@ -359,7 +359,7 @@ public abstract class AbstractFSContractTestBase extends Assert
|
||||||
assertEquals(text + " wrong read result " + result, -1, result);
|
assertEquals(text + " wrong read result " + result, -1, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean rename(Path src, Path dst) throws IOException {
|
protected boolean rename(Path src, Path dst) throws IOException {
|
||||||
return getFileSystem().rename(src, dst);
|
return getFileSystem().rename(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -778,7 +778,7 @@ public class S3AFileSystem extends FileSystem {
|
||||||
copyFile(summary.getKey(), newDstKey, summary.getSize());
|
copyFile(summary.getKey(), newDstKey, summary.getSize());
|
||||||
|
|
||||||
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
|
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
|
||||||
removeKeys(keysToDelete, true);
|
removeKeys(keysToDelete, true, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -786,7 +786,7 @@ public class S3AFileSystem extends FileSystem {
|
||||||
objects = continueListObjects(objects);
|
objects = continueListObjects(objects);
|
||||||
} else {
|
} else {
|
||||||
if (!keysToDelete.isEmpty()) {
|
if (!keysToDelete.isEmpty()) {
|
||||||
removeKeys(keysToDelete, false);
|
removeKeys(keysToDelete, false, false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1034,17 +1034,25 @@ public class S3AFileSystem extends FileSystem {
|
||||||
* @param keysToDelete collection of keys to delete on the s3-backend
|
* @param keysToDelete collection of keys to delete on the s3-backend
|
||||||
* @param clearKeys clears the keysToDelete-list after processing the list
|
* @param clearKeys clears the keysToDelete-list after processing the list
|
||||||
* when set to true
|
* when set to true
|
||||||
|
* @param deleteFakeDir indicates whether this is for deleting fake dirs
|
||||||
*/
|
*/
|
||||||
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
|
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
|
||||||
boolean clearKeys) throws AmazonClientException {
|
boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
|
||||||
|
if (keysToDelete.isEmpty()) {
|
||||||
|
// no keys
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (enableMultiObjectsDelete) {
|
if (enableMultiObjectsDelete) {
|
||||||
deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
|
deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
|
||||||
instrumentation.fileDeleted(keysToDelete.size());
|
|
||||||
} else {
|
} else {
|
||||||
for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
|
for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
|
||||||
deleteObject(keyVersion.getKey());
|
deleteObject(keyVersion.getKey());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (!deleteFakeDir) {
|
||||||
instrumentation.fileDeleted(keysToDelete.size());
|
instrumentation.fileDeleted(keysToDelete.size());
|
||||||
|
} else {
|
||||||
|
instrumentation.fakeDirsDeleted(keysToDelete.size());
|
||||||
}
|
}
|
||||||
if (clearKeys) {
|
if (clearKeys) {
|
||||||
keysToDelete.clear();
|
keysToDelete.clear();
|
||||||
|
@ -1132,7 +1140,7 @@ public class S3AFileSystem extends FileSystem {
|
||||||
LOG.debug("Got object to delete {}", summary.getKey());
|
LOG.debug("Got object to delete {}", summary.getKey());
|
||||||
|
|
||||||
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
|
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
|
||||||
removeKeys(keys, true);
|
removeKeys(keys, true, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1140,7 +1148,7 @@ public class S3AFileSystem extends FileSystem {
|
||||||
objects = continueListObjects(objects);
|
objects = continueListObjects(objects);
|
||||||
} else {
|
} else {
|
||||||
if (!keys.isEmpty()) {
|
if (!keys.isEmpty()) {
|
||||||
removeKeys(keys, false);
|
removeKeys(keys, false, false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1630,36 +1638,29 @@ public class S3AFileSystem extends FileSystem {
|
||||||
/**
|
/**
|
||||||
* Delete mock parent directories which are no longer needed.
|
* Delete mock parent directories which are no longer needed.
|
||||||
* This code swallows IO exceptions encountered
|
* This code swallows IO exceptions encountered
|
||||||
* @param f path
|
* @param path path
|
||||||
*/
|
*/
|
||||||
private void deleteUnnecessaryFakeDirectories(Path f) {
|
private void deleteUnnecessaryFakeDirectories(Path path) {
|
||||||
while (true) {
|
List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
|
||||||
String key = "";
|
while (!path.isRoot()) {
|
||||||
|
String key = pathToKey(path);
|
||||||
|
key = (key.endsWith("/")) ? key : (key + "/");
|
||||||
|
keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
|
||||||
|
path = path.getParent();
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
key = pathToKey(f);
|
removeKeys(keysToRemove, false, true);
|
||||||
if (key.isEmpty()) {
|
} catch(AmazonClientException e) {
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
S3AFileStatus status = getFileStatus(f);
|
|
||||||
|
|
||||||
if (status.isDirectory() && status.isEmptyDirectory()) {
|
|
||||||
LOG.debug("Deleting fake directory {}/", key);
|
|
||||||
deleteObject(key + "/");
|
|
||||||
}
|
|
||||||
} catch (IOException | AmazonClientException e) {
|
|
||||||
LOG.debug("While deleting key {} ", key, e);
|
|
||||||
instrumentation.errorIgnored();
|
instrumentation.errorIgnored();
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
|
||||||
|
sb.append(kv.getKey()).append(",");
|
||||||
|
}
|
||||||
|
LOG.debug("While deleting keys {} ", sb.toString(), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (f.isRoot()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
f = f.getParent();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void createFakeDirectory(final String objectName)
|
private void createFakeDirectory(final String objectName)
|
||||||
throws AmazonClientException, AmazonServiceException,
|
throws AmazonClientException, AmazonServiceException,
|
||||||
|
|
|
@ -75,6 +75,7 @@ public class S3AInstrumentation {
|
||||||
private final MutableCounterLong numberOfFilesCopied;
|
private final MutableCounterLong numberOfFilesCopied;
|
||||||
private final MutableCounterLong bytesOfFilesCopied;
|
private final MutableCounterLong bytesOfFilesCopied;
|
||||||
private final MutableCounterLong numberOfFilesDeleted;
|
private final MutableCounterLong numberOfFilesDeleted;
|
||||||
|
private final MutableCounterLong numberOfFakeDirectoryDeletes;
|
||||||
private final MutableCounterLong numberOfDirectoriesCreated;
|
private final MutableCounterLong numberOfDirectoriesCreated;
|
||||||
private final MutableCounterLong numberOfDirectoriesDeleted;
|
private final MutableCounterLong numberOfDirectoriesDeleted;
|
||||||
private final Map<String, MutableCounterLong> streamMetrics =
|
private final Map<String, MutableCounterLong> streamMetrics =
|
||||||
|
@ -134,6 +135,7 @@ public class S3AInstrumentation {
|
||||||
numberOfFilesCopied = counter(FILES_COPIED);
|
numberOfFilesCopied = counter(FILES_COPIED);
|
||||||
bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
|
bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
|
||||||
numberOfFilesDeleted = counter(FILES_DELETED);
|
numberOfFilesDeleted = counter(FILES_DELETED);
|
||||||
|
numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
|
||||||
numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
|
numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
|
||||||
numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
|
numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
|
||||||
ignoredErrors = counter(IGNORED_ERRORS);
|
ignoredErrors = counter(IGNORED_ERRORS);
|
||||||
|
@ -294,6 +296,14 @@ public class S3AInstrumentation {
|
||||||
numberOfFilesDeleted.incr(count);
|
numberOfFilesDeleted.incr(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicate that fake directory request was made.
|
||||||
|
* @param count number of directory entries included in the delete request.
|
||||||
|
*/
|
||||||
|
public void fakeDirsDeleted(int count) {
|
||||||
|
numberOfFakeDirectoryDeletes.incr(count);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicate that S3A created a directory.
|
* Indicate that S3A created a directory.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -42,6 +42,10 @@ public enum Statistic {
|
||||||
"Total number of files created through the object store."),
|
"Total number of files created through the object store."),
|
||||||
FILES_DELETED("files_deleted",
|
FILES_DELETED("files_deleted",
|
||||||
"Total number of files deleted from the object store."),
|
"Total number of files deleted from the object store."),
|
||||||
|
FAKE_DIRECTORIES_CREATED("fake_directories_created",
|
||||||
|
"Total number of fake directory entries created in the object store."),
|
||||||
|
FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
|
||||||
|
"Total number of fake directory deletes submitted to object store."),
|
||||||
IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
|
IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
|
||||||
INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
|
INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
|
||||||
"Calls of copyFromLocalFile()"),
|
"Calls of copyFromLocalFile()"),
|
||||||
|
|
|
@ -296,13 +296,22 @@ public class S3ATestUtils {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that the value of {@link #diff()} matches that expected.
|
||||||
|
* @param message message to print; metric name is appended
|
||||||
|
* @param expected expected value.
|
||||||
|
*/
|
||||||
|
public void assertDiffEquals(String message, long expected) {
|
||||||
|
Assert.assertEquals(message + ": " + statistic.getSymbol(),
|
||||||
|
expected, diff());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assert that the value of {@link #diff()} matches that expected.
|
* Assert that the value of {@link #diff()} matches that expected.
|
||||||
* @param expected expected value.
|
* @param expected expected value.
|
||||||
*/
|
*/
|
||||||
public void assertDiffEquals(long expected) {
|
public void assertDiffEquals(long expected) {
|
||||||
Assert.assertEquals("Count of " + this,
|
assertDiffEquals("Count of " + this, expected);
|
||||||
expected, diff());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -189,4 +189,89 @@ public class TestS3AFileOperationCost extends AbstractFSContractTestBase {
|
||||||
tmpFile.delete();
|
tmpFile.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void reset(MetricDiff... diffs) {
|
||||||
|
for (MetricDiff diff : diffs) {
|
||||||
|
diff.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFakeDirectoryDeletion() throws Throwable {
|
||||||
|
describe("Verify whether create file works after renaming a file. "
|
||||||
|
+ "In S3, rename deletes any fake directories as a part of "
|
||||||
|
+ "clean up activity");
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Path srcBaseDir = path("src");
|
||||||
|
mkdirs(srcBaseDir);
|
||||||
|
MetricDiff deleteRequests =
|
||||||
|
new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
|
||||||
|
MetricDiff directoriesDeleted =
|
||||||
|
new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
|
||||||
|
MetricDiff fakeDirectoriesDeleted =
|
||||||
|
new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
|
||||||
|
MetricDiff directoriesCreated =
|
||||||
|
new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);
|
||||||
|
|
||||||
|
Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
|
||||||
|
Path srcFilePath = new Path(srcDir, "source.txt");
|
||||||
|
int srcDirDepth = directoriesInPath(srcDir);
|
||||||
|
// one dir created, one removed
|
||||||
|
mkdirs(srcDir);
|
||||||
|
String state = "after mkdir(srcDir)";
|
||||||
|
directoriesCreated.assertDiffEquals(state, 1);
|
||||||
|
/* TODO: uncomment once HADOOP-13222 is in
|
||||||
|
deleteRequests.assertDiffEquals(state, 1);
|
||||||
|
directoriesDeleted.assertDiffEquals(state, 0);
|
||||||
|
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
|
||||||
|
*/
|
||||||
|
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||||
|
fakeDirectoriesDeleted);
|
||||||
|
|
||||||
|
// creating a file should trigger demise of the src dir
|
||||||
|
touch(fs, srcFilePath);
|
||||||
|
state = "after touch(fs, srcFilePath)";
|
||||||
|
deleteRequests.assertDiffEquals(state, 1);
|
||||||
|
directoriesCreated.assertDiffEquals(state, 0);
|
||||||
|
directoriesDeleted.assertDiffEquals(state, 0);
|
||||||
|
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
|
||||||
|
|
||||||
|
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||||
|
fakeDirectoriesDeleted);
|
||||||
|
|
||||||
|
Path destBaseDir = path("dest");
|
||||||
|
Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
|
||||||
|
Path destFilePath = new Path(destDir, "dest.txt");
|
||||||
|
mkdirs(destDir);
|
||||||
|
state = "after mkdir(destDir)";
|
||||||
|
|
||||||
|
int destDirDepth = directoriesInPath(destDir);
|
||||||
|
directoriesCreated.assertDiffEquals(state, 1);
|
||||||
|
/* TODO: uncomment once HADOOP-13222 is in
|
||||||
|
deleteRequests.assertDiffEquals(state,1);
|
||||||
|
directoriesDeleted.assertDiffEquals(state,0);
|
||||||
|
fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
|
||||||
|
*/
|
||||||
|
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||||
|
fakeDirectoriesDeleted);
|
||||||
|
|
||||||
|
fs.rename(srcFilePath, destFilePath);
|
||||||
|
state = "after rename(srcFilePath, destFilePath)";
|
||||||
|
directoriesCreated.assertDiffEquals(state, 1);
|
||||||
|
// one for the renamed file, one for the parent
|
||||||
|
deleteRequests.assertDiffEquals(state, 2);
|
||||||
|
directoriesDeleted.assertDiffEquals(state, 0);
|
||||||
|
fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);
|
||||||
|
|
||||||
|
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||||
|
fakeDirectoriesDeleted);
|
||||||
|
|
||||||
|
assertIsFile(destFilePath);
|
||||||
|
assertIsDirectory(srcDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int directoriesInPath(Path path) {
|
||||||
|
return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue