diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index a8368cb7e58..7b61e66d613 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -387,6 +387,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12755. Fix typo in defaultFS warning message. (wang) + HADOOP-12292. Make use of DeleteObjects optional. + (Thomas Demoor via stevel) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index b1286c5a0fc..b45f7bcc57b 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -888,6 +888,15 @@ for ldap providers in the same way as above does. Threshold before uploads or copies use parallel multipart operations. + + fs.s3a.multiobjectdelete.enable + true + When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + + + fs.s3a.acl.default Set a canned ACL for newly created and copied objects. Value may be private, diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index ab03e2cfb1b..91a38cb1b3e 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -111,6 +111,7 @@ 1.3.1 1.0-beta-1 1.0-alpha-8 + 900 @@ -1172,7 +1173,7 @@ maven-surefire-plugin false - 900 + ${surefire.fork.timeout} ${maven-surefire-plugin.argLine} ${hadoop.common.build.dir} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 60d4b9b875b..faa760cac38 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -84,6 +84,9 @@ public class Constants { public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold"; public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE; + //enable multiobject-delete calls? + public static final String ENABLE_MULTI_DELETE = "fs.s3a.multiobjectdelete.enable"; + // comma separated list of directories public static final String BUFFER_DIR = "fs.s3a.buffer.dir"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 6ede9f2d623..adf6178cc5f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -41,6 +41,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.CannedAccessControlList; +import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; @@ -85,6 +86,7 @@ public class S3AFileSystem extends FileSystem { private String bucket; private int maxKeys; private long partSize; + private boolean enableMultiObjectsDelete; private TransferManager transfers; private ThreadPoolExecutor threadPoolExecutor; private long multiPartThreshold; @@ -252,6 +254,7 @@ public class S3AFileSystem extends FileSystem { partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE); multiPartThreshold = conf.getLong(MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD); + enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true); if (partSize < 5 * 1024 * 1024) { LOG.error(MULTIPART_SIZE + " must be at least 5 MB"); @@ -580,11 +583,7 @@ public class S3AFileSystem extends FileSystem { copyFile(summary.getKey(), newDstKey); if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keysToDelete.clear(); + removeKeys(keysToDelete, true); } } @@ -592,11 +591,8 @@ public class S3AFileSystem extends FileSystem { objects = s3.listNextBatchOfObjects(objects); statistics.incrementReadOps(1); } else { - if (keysToDelete.size() > 0) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + if (!keysToDelete.isEmpty()) { + removeKeys(keysToDelete, false); } break; } @@ -610,6 +606,36 @@ public class S3AFileSystem extends FileSystem { return true; } + /** + * A helper method to delete a list of keys on a s3-backend. + * + * @param keysToDelete collection of keys to delete on the s3-backend + * @param clearKeys clears the keysToDelete-list after processing the list + * when set to true + */ + private void removeKeys(List keysToDelete, + boolean clearKeys) { + if (enableMultiObjectsDelete) { + DeleteObjectsRequest deleteRequest + = new DeleteObjectsRequest(bucket).withKeys(keysToDelete); + s3.deleteObjects(deleteRequest); + statistics.incrementWriteOps(1); + } else { + int writeops = 0; + + for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { + s3.deleteObject( + new DeleteObjectRequest(bucket, keyVersion.getKey())); + writeops++; + } + + statistics.incrementWriteOps(writeops); + } + if (clearKeys) { + keysToDelete.clear(); + } + } + /** Delete a file. * * @param f the path to delete. @@ -684,11 +710,7 @@ public class S3AFileSystem extends FileSystem { } if (keys.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keys.clear(); + removeKeys(keys, true); } } @@ -697,10 +719,7 @@ public class S3AFileSystem extends FileSystem { statistics.incrementReadOps(1); } else { if (!keys.isEmpty()) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + removeKeys(keys, false); } break; } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index b27c0505163..2f3352ba892 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -272,6 +272,15 @@ If you do any of these: change your credentials immediately! Threshold before uploads or copies use parallel multipart operations. + + fs.s3a.multiobjectdelete.enable + false + When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + + + fs.s3a.acl.default Set a canned ACL for newly created and copied objects. Value may be private, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java index e0cbc92f5c9..e44a90e902e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -36,13 +36,21 @@ import static org.junit.Assume.assumeTrue; /** * Base class for scale tests; here is where the common scale configuration - * keys are defined + * keys are defined. */ public class S3AScaleTestBase { public static final String SCALE_TEST = "scale.test."; + + /** + * The number of operations to perform: {@value} + */ public static final String KEY_OPERATION_COUNT = SCALE_TEST + "operation.count"; + + /** + * The default number of operations to perform: {@value} + */ public static final long DEFAULT_OPERATION_COUNT = 2005; protected S3AFileSystem fs; @@ -71,6 +79,7 @@ public class S3AScaleTestBase { @Before public void setUp() throws Exception { conf = createConfiguration(); + LOG.info("Scale test operation count = {}", getOperationCount()); fs = S3ATestUtils.createTestFileSystem(conf); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteFilesOneByOne.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteFilesOneByOne.java new file mode 100644 index 00000000000..77c85a91388 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteFilesOneByOne.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.scale; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.Constants; +import org.junit.Test; + +import java.io.IOException; + +public class TestS3ADeleteFilesOneByOne extends TestS3ADeleteManyFiles { + + @Override + protected Configuration createConfiguration() { + Configuration configuration = super.createConfiguration(); + configuration.setBoolean(Constants.ENABLE_MULTI_DELETE, false); + return configuration; + } + + @Test + public void testOpenCreate() throws IOException { + + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java index c913a67a4a9..d521ba8ac99 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java @@ -61,7 +61,7 @@ public class TestS3ADeleteManyFiles extends S3AScaleTestBase { // use Executor to speed up file creation ExecutorService exec = Executors.newFixedThreadPool(16); final ExecutorCompletionService completionService = - new ExecutorCompletionService(exec); + new ExecutorCompletionService<>(exec); try { final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');