HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan.
This commit is contained in:
parent
7296999c89
commit
d24933ad3a
|
@ -359,7 +359,7 @@ public abstract class AbstractFSContractTestBase extends Assert
|
|||
assertEquals(text + " wrong read result " + result, -1, result);
|
||||
}
|
||||
|
||||
boolean rename(Path src, Path dst) throws IOException {
|
||||
protected boolean rename(Path src, Path dst) throws IOException {
|
||||
return getFileSystem().rename(src, dst);
|
||||
}
|
||||
|
||||
|
|
|
@ -778,7 +778,7 @@ public class S3AFileSystem extends FileSystem {
|
|||
copyFile(summary.getKey(), newDstKey, summary.getSize());
|
||||
|
||||
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
|
||||
removeKeys(keysToDelete, true);
|
||||
removeKeys(keysToDelete, true, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -786,7 +786,7 @@ public class S3AFileSystem extends FileSystem {
|
|||
objects = continueListObjects(objects);
|
||||
} else {
|
||||
if (!keysToDelete.isEmpty()) {
|
||||
removeKeys(keysToDelete, false);
|
||||
removeKeys(keysToDelete, false, false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1034,17 +1034,25 @@ public class S3AFileSystem extends FileSystem {
|
|||
* @param keysToDelete collection of keys to delete on the s3-backend
|
||||
* @param clearKeys clears the keysToDelete-list after processing the list
|
||||
* when set to true
|
||||
* @param deleteFakeDir indicates whether this is for deleting fake dirs
|
||||
*/
|
||||
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
|
||||
boolean clearKeys) throws AmazonClientException {
|
||||
boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
|
||||
if (keysToDelete.isEmpty()) {
|
||||
// no keys
|
||||
return;
|
||||
}
|
||||
if (enableMultiObjectsDelete) {
|
||||
deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
|
||||
instrumentation.fileDeleted(keysToDelete.size());
|
||||
} else {
|
||||
for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
|
||||
deleteObject(keyVersion.getKey());
|
||||
}
|
||||
}
|
||||
if (!deleteFakeDir) {
|
||||
instrumentation.fileDeleted(keysToDelete.size());
|
||||
} else {
|
||||
instrumentation.fakeDirsDeleted(keysToDelete.size());
|
||||
}
|
||||
if (clearKeys) {
|
||||
keysToDelete.clear();
|
||||
|
@ -1132,7 +1140,7 @@ public class S3AFileSystem extends FileSystem {
|
|||
LOG.debug("Got object to delete {}", summary.getKey());
|
||||
|
||||
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
|
||||
removeKeys(keys, true);
|
||||
removeKeys(keys, true, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1140,7 +1148,7 @@ public class S3AFileSystem extends FileSystem {
|
|||
objects = continueListObjects(objects);
|
||||
} else {
|
||||
if (!keys.isEmpty()) {
|
||||
removeKeys(keys, false);
|
||||
removeKeys(keys, false, false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1630,37 +1638,30 @@ public class S3AFileSystem extends FileSystem {
|
|||
/**
|
||||
* Delete mock parent directories which are no longer needed.
|
||||
* This code swallows IO exceptions encountered
|
||||
* @param f path
|
||||
* @param path path
|
||||
*/
|
||||
private void deleteUnnecessaryFakeDirectories(Path f) {
|
||||
while (true) {
|
||||
String key = "";
|
||||
try {
|
||||
key = pathToKey(f);
|
||||
if (key.isEmpty()) {
|
||||
break;
|
||||
private void deleteUnnecessaryFakeDirectories(Path path) {
|
||||
List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
|
||||
while (!path.isRoot()) {
|
||||
String key = pathToKey(path);
|
||||
key = (key.endsWith("/")) ? key : (key + "/");
|
||||
keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
|
||||
path = path.getParent();
|
||||
}
|
||||
try {
|
||||
removeKeys(keysToRemove, false, true);
|
||||
} catch(AmazonClientException e) {
|
||||
instrumentation.errorIgnored();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
|
||||
sb.append(kv.getKey()).append(",");
|
||||
}
|
||||
|
||||
S3AFileStatus status = getFileStatus(f);
|
||||
|
||||
if (status.isDirectory() && status.isEmptyDirectory()) {
|
||||
LOG.debug("Deleting fake directory {}/", key);
|
||||
deleteObject(key + "/");
|
||||
}
|
||||
} catch (IOException | AmazonClientException e) {
|
||||
LOG.debug("While deleting key {} ", key, e);
|
||||
instrumentation.errorIgnored();
|
||||
LOG.debug("While deleting keys {} ", sb.toString(), e);
|
||||
}
|
||||
|
||||
if (f.isRoot()) {
|
||||
break;
|
||||
}
|
||||
|
||||
f = f.getParent();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void createFakeDirectory(final String objectName)
|
||||
throws AmazonClientException, AmazonServiceException,
|
||||
InterruptedIOException {
|
||||
|
|
|
@ -75,6 +75,7 @@ public class S3AInstrumentation {
|
|||
private final MutableCounterLong numberOfFilesCopied;
|
||||
private final MutableCounterLong bytesOfFilesCopied;
|
||||
private final MutableCounterLong numberOfFilesDeleted;
|
||||
private final MutableCounterLong numberOfFakeDirectoryDeletes;
|
||||
private final MutableCounterLong numberOfDirectoriesCreated;
|
||||
private final MutableCounterLong numberOfDirectoriesDeleted;
|
||||
private final Map<String, MutableCounterLong> streamMetrics =
|
||||
|
@ -134,6 +135,7 @@ public class S3AInstrumentation {
|
|||
numberOfFilesCopied = counter(FILES_COPIED);
|
||||
bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
|
||||
numberOfFilesDeleted = counter(FILES_DELETED);
|
||||
numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
|
||||
numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
|
||||
numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
|
||||
ignoredErrors = counter(IGNORED_ERRORS);
|
||||
|
@ -294,6 +296,14 @@ public class S3AInstrumentation {
|
|||
numberOfFilesDeleted.incr(count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate that fake directory request was made.
|
||||
* @param count number of directory entries included in the delete request.
|
||||
*/
|
||||
public void fakeDirsDeleted(int count) {
|
||||
numberOfFakeDirectoryDeletes.incr(count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate that S3A created a directory.
|
||||
*/
|
||||
|
|
|
@ -42,6 +42,10 @@ public enum Statistic {
|
|||
"Total number of files created through the object store."),
|
||||
FILES_DELETED("files_deleted",
|
||||
"Total number of files deleted from the object store."),
|
||||
FAKE_DIRECTORIES_CREATED("fake_directories_created",
|
||||
"Total number of fake directory entries created in the object store."),
|
||||
FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
|
||||
"Total number of fake directory deletes submitted to object store."),
|
||||
IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
|
||||
INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
|
||||
"Calls of copyFromLocalFile()"),
|
||||
|
|
|
@ -296,13 +296,22 @@ public class S3ATestUtils {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that the value of {@link #diff()} matches that expected.
|
||||
* @param message message to print; metric name is appended
|
||||
* @param expected expected value.
|
||||
*/
|
||||
public void assertDiffEquals(String message, long expected) {
|
||||
Assert.assertEquals(message + ": " + statistic.getSymbol(),
|
||||
expected, diff());
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that the value of {@link #diff()} matches that expected.
|
||||
* @param expected expected value.
|
||||
*/
|
||||
public void assertDiffEquals(long expected) {
|
||||
Assert.assertEquals("Count of " + this,
|
||||
expected, diff());
|
||||
assertDiffEquals("Count of " + this, expected);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -189,4 +189,89 @@ public class TestS3AFileOperationCost extends AbstractFSContractTestBase {
|
|||
tmpFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private void reset(MetricDiff... diffs) {
|
||||
for (MetricDiff diff : diffs) {
|
||||
diff.reset();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFakeDirectoryDeletion() throws Throwable {
|
||||
describe("Verify whether create file works after renaming a file. "
|
||||
+ "In S3, rename deletes any fake directories as a part of "
|
||||
+ "clean up activity");
|
||||
S3AFileSystem fs = getFileSystem();
|
||||
Path srcBaseDir = path("src");
|
||||
mkdirs(srcBaseDir);
|
||||
MetricDiff deleteRequests =
|
||||
new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
|
||||
MetricDiff directoriesDeleted =
|
||||
new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
|
||||
MetricDiff fakeDirectoriesDeleted =
|
||||
new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
|
||||
MetricDiff directoriesCreated =
|
||||
new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);
|
||||
|
||||
Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
|
||||
Path srcFilePath = new Path(srcDir, "source.txt");
|
||||
int srcDirDepth = directoriesInPath(srcDir);
|
||||
// one dir created, one removed
|
||||
mkdirs(srcDir);
|
||||
String state = "after mkdir(srcDir)";
|
||||
directoriesCreated.assertDiffEquals(state, 1);
|
||||
/* TODO: uncomment once HADOOP-13222 is in
|
||||
deleteRequests.assertDiffEquals(state, 1);
|
||||
directoriesDeleted.assertDiffEquals(state, 0);
|
||||
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
|
||||
*/
|
||||
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||
fakeDirectoriesDeleted);
|
||||
|
||||
// creating a file should trigger demise of the src dir
|
||||
touch(fs, srcFilePath);
|
||||
state = "after touch(fs, srcFilePath)";
|
||||
deleteRequests.assertDiffEquals(state, 1);
|
||||
directoriesCreated.assertDiffEquals(state, 0);
|
||||
directoriesDeleted.assertDiffEquals(state, 0);
|
||||
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
|
||||
|
||||
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||
fakeDirectoriesDeleted);
|
||||
|
||||
Path destBaseDir = path("dest");
|
||||
Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
|
||||
Path destFilePath = new Path(destDir, "dest.txt");
|
||||
mkdirs(destDir);
|
||||
state = "after mkdir(destDir)";
|
||||
|
||||
int destDirDepth = directoriesInPath(destDir);
|
||||
directoriesCreated.assertDiffEquals(state, 1);
|
||||
/* TODO: uncomment once HADOOP-13222 is in
|
||||
deleteRequests.assertDiffEquals(state,1);
|
||||
directoriesDeleted.assertDiffEquals(state,0);
|
||||
fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
|
||||
*/
|
||||
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||
fakeDirectoriesDeleted);
|
||||
|
||||
fs.rename(srcFilePath, destFilePath);
|
||||
state = "after rename(srcFilePath, destFilePath)";
|
||||
directoriesCreated.assertDiffEquals(state, 1);
|
||||
// one for the renamed file, one for the parent
|
||||
deleteRequests.assertDiffEquals(state, 2);
|
||||
directoriesDeleted.assertDiffEquals(state, 0);
|
||||
fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);
|
||||
|
||||
reset(deleteRequests, directoriesCreated, directoriesDeleted,
|
||||
fakeDirectoriesDeleted);
|
||||
|
||||
assertIsFile(destFilePath);
|
||||
assertIsDirectory(srcDir);
|
||||
}
|
||||
|
||||
private int directoriesInPath(Path path) {
|
||||
return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue