From 1fe918cdf73980e7de3c71c323f319d0ceb41ec9 Mon Sep 17 00:00:00 2001 From: ahmarsuhail Date: Thu, 8 Jun 2023 18:19:50 +0100 Subject: [PATCH] HADOOP-18073. Upgrade S3A in 3.3 branch to AWS SDK V2.(#5707) Contributed by Ahmar Suhail --- .../fs/statistics/StoreStatisticNames.java | 4 + hadoop-project/pom.xml | 14 +- hadoop-tools/hadoop-aws/pom.xml | 12 +- .../hadoop/fs/s3a/AWSBadRequestException.java | 4 +- .../hadoop/fs/s3a/AWSClientIOException.java | 13 +- .../fs/s3a/AWSCredentialProviderList.java | 98 +- .../hadoop/fs/s3a/AWSNoResponseException.java | 4 +- .../hadoop/fs/s3a/AWSRedirectException.java | 4 +- .../hadoop/fs/s3a/AWSS3IOException.java | 25 +- .../hadoop/fs/s3a/AWSServiceIOException.java | 37 +- .../fs/s3a/AWSServiceThrottledException.java | 4 +- .../hadoop/fs/s3a/AWSStatus500Exception.java | 4 +- .../s3a/AnonymousAWSCredentialsProvider.java | 18 +- .../org/apache/hadoop/fs/s3a/ArnResource.java | 12 +- .../org/apache/hadoop/fs/s3a/Constants.java | 9 +- .../CredentialInitializationException.java | 15 +- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 437 +++----- .../hadoop/fs/s3a/FailureInjectionPolicy.java | 2 +- .../fs/s3a/InconsistentAmazonS3Client.java | 345 ------- .../fs/s3a/InconsistentS3ClientFactory.java | 85 +- .../org/apache/hadoop/fs/s3a/Invoker.java | 21 +- .../org/apache/hadoop/fs/s3a/Listing.java | 53 +- .../apache/hadoop/fs/s3a/MultipartUtils.java | 49 +- .../fs/s3a/ProgressableProgressListener.java | 45 +- .../hadoop/fs/s3a/S3ABlockOutputStream.java | 122 ++- .../apache/hadoop/fs/s3a/S3ADataBlocks.java | 8 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 946 ++++++++++-------- .../apache/hadoop/fs/s3a/S3AInputStream.java | 116 +-- .../apache/hadoop/fs/s3a/S3ARetryPolicy.java | 16 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 634 +++--------- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 171 +++- .../apache/hadoop/fs/s3a/S3ListRequest.java | 16 +- .../apache/hadoop/fs/s3a/S3ListResult.java | 61 +- .../hadoop/fs/s3a/S3ObjectAttributes.java | 27 - .../s3a/SharedInstanceCredentialProvider.java | 1 - .../fs/s3a/SimpleAWSCredentialsProvider.java | 18 +- .../org/apache/hadoop/fs/s3a/Statistic.java | 5 + .../s3a/TemporaryAWSCredentialsProvider.java | 7 +- .../org/apache/hadoop/fs/s3a/UploadInfo.java | 12 +- .../hadoop/fs/s3a/WriteOperationHelper.java | 202 ++-- .../apache/hadoop/fs/s3a/WriteOperations.java | 107 +- .../V1ToV2AwsCredentialProviderAdapter.java | 74 ++ .../V1V2AwsCredentialProviderAdapter.java | 36 + .../hadoop/fs/s3a/adapter/package-info.java | 27 + .../hadoop/fs/s3a/api/RequestFactory.java | 218 ++-- .../fs/s3a/audit/AWSAuditEventCallbacks.java | 105 +- .../fs/s3a/audit/AWSRequestAnalyzer.java | 135 +-- .../hadoop/fs/s3a/audit/AuditIntegration.java | 28 +- .../hadoop/fs/s3a/audit/AuditManagerS3A.java | 21 +- .../fs/s3a/audit/S3AAuditConstants.java | 13 +- .../s3a/audit/impl/ActiveAuditManagerS3A.java | 408 ++++---- .../fs/s3a/audit/impl/LoggingAuditor.java | 125 ++- .../s3a/audit/impl/NoopAuditManagerS3A.java | 16 +- .../audit/impl/S3AInternalAuditConstants.java | 16 +- .../auth/AbstractAWSCredentialProvider.java | 13 +- .../AbstractSessionCredentialsProvider.java | 27 +- .../auth/AssumedRoleCredentialProvider.java | 68 +- .../s3a/auth/AwsCredentialListProvider.java | 283 ++++++ .../auth/IAMInstanceCredentialsProvider.java | 40 +- .../s3a/auth/MarshalledCredentialBinding.java | 57 +- .../auth/MarshalledCredentialProvider.java | 5 +- .../fs/s3a/auth/NoAuthWithAWSException.java | 2 +- .../hadoop/fs/s3a/auth/STSClientFactory.java | 140 +-- .../hadoop/fs/s3a/auth/SignerFactory.java | 114 +++ .../hadoop/fs/s3a/auth/SignerManager.java | 8 +- .../EncryptionSecretOperations.java | 38 +- .../s3a/auth/delegation/RoleTokenBinding.java | 2 +- .../auth/delegation/SessionTokenBinding.java | 38 +- .../fs/s3a/commit/AbstractS3ACommitter.java | 9 +- .../hadoop/fs/s3a/commit/PutTracker.java | 4 +- .../s3a/commit/files/SinglePendingCommit.java | 13 +- .../fs/s3a/commit/impl/CommitOperations.java | 54 +- .../s3a/commit/magic/MagicCommitTracker.java | 28 +- .../hadoop/fs/s3a/impl/AWSCannedACL.java | 43 + .../hadoop/fs/s3a/impl/AWSClientConfig.java | 379 +++++++ .../apache/hadoop/fs/s3a/impl/AWSHeaders.java | 98 ++ .../fs/s3a/impl/BulkDeleteRetryHandler.java | 12 +- .../fs/s3a/impl/ChangeDetectionPolicy.java | 125 ++- .../hadoop/fs/s3a/impl/ChangeTracker.java | 71 +- .../impl/ConfigureShadedAWSSocketFactory.java | 13 +- .../hadoop/fs/s3a/impl/CopyOutcome.java | 80 -- .../hadoop/fs/s3a/impl/DeleteOperation.java | 17 +- .../hadoop/fs/s3a/impl/ErrorTranslation.java | 14 +- .../hadoop/fs/s3a/impl/HeaderProcessing.java | 206 ++-- .../hadoop/fs/s3a/impl/InternalConstants.java | 51 +- ...t.java => MultiObjectDeleteException.java} | 67 +- .../hadoop/fs/s3a/impl/NetworkBinding.java | 11 +- .../fs/s3a/impl/OperationCallbacks.java | 15 +- .../hadoop/fs/s3a/impl/ProgressListener.java | 26 + .../fs/s3a/impl/ProgressListenerEvent.java | 29 + .../hadoop/fs/s3a/impl/RenameOperation.java | 23 +- .../fs/s3a/impl/RequestFactoryImpl.java | 669 ++++++------- .../fs/s3a/impl/S3AMultipartUploader.java | 31 +- .../hadoop/fs/s3a/impl/SDKStreamDrainer.java | 43 +- .../hadoop/fs/s3a/impl/V2Migration.java | 12 + .../fs/s3a/prefetch/S3ARemoteObject.java | 54 +- .../s3a/prefetch/S3ARemoteObjectReader.java | 7 +- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 8 +- .../fs/s3a/select/BlockingEnumeration.java | 156 +++ .../hadoop/fs/s3a/select/SelectBinding.java | 126 ++- .../select/SelectEventStreamPublisher.java | 124 +++ .../fs/s3a/select/SelectInputStream.java | 31 +- .../s3a/select/SelectObjectContentHelper.java | 114 +++ .../impl/AwsStatisticsCollector.java | 160 ++- .../hadoop/fs/s3a/tools/MarkerTool.java | 17 +- .../fs/s3a/tools/MarkerToolOperations.java | 13 +- .../s3a/tools/MarkerToolOperationsImpl.java | 11 +- .../tools/hadoop-aws/aws_sdk_v2_changelog.md | 340 +++++++ .../site/markdown/tools/hadoop-aws/testing.md | 22 +- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 28 +- .../hadoop/fs/s3a/EncryptionTestUtils.java | 20 +- .../s3a/ITestS3AAWSCredentialsProvider.java | 25 +- .../fs/s3a/ITestS3ABucketExistence.java | 33 +- .../hadoop/fs/s3a/ITestS3ACannedACLs.java | 31 +- .../s3a/ITestS3AClientSideEncryptionKms.java | 6 +- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 158 ++- .../hadoop/fs/s3a/ITestS3AEncryptionSSEC.java | 2 +- .../ITestS3AEncryptionSSEKMSDefaultKey.java | 8 +- ...estS3AEncryptionWithDefaultS3Settings.java | 2 +- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 221 ++-- .../fs/s3a/ITestS3AFailureHandling.java | 28 +- .../hadoop/fs/s3a/ITestS3AMiscOperations.java | 33 +- .../hadoop/fs/s3a/ITestS3AMultipartUtils.java | 5 +- .../hadoop/fs/s3a/ITestS3ARequesterPays.java | 2 +- .../hadoop/fs/s3a/ITestS3AStorageClass.java | 1 - .../fs/s3a/ITestS3ATemporaryCredentials.java | 26 +- .../hadoop/fs/s3a/MockS3AFileSystem.java | 17 +- .../hadoop/fs/s3a/MockS3ClientFactory.java | 49 +- .../hadoop/fs/s3a/MultipartTestUtils.java | 35 +- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 10 +- .../apache/hadoop/fs/s3a/TestArnResource.java | 9 +- .../org/apache/hadoop/fs/s3a/TestInvoker.java | 105 +- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 76 +- .../fs/s3a/TestS3ABlockOutputStream.java | 11 +- .../hadoop/fs/s3a/TestS3ADeleteOnExit.java | 25 +- .../fs/s3a/TestS3AExceptionTranslation.java | 112 ++- .../hadoop/fs/s3a/TestS3AGetFileStatus.java | 95 +- .../fs/s3a/TestS3AInputStreamRetry.java | 124 +-- .../apache/hadoop/fs/s3a/TestS3AProxy.java | 18 +- .../apache/hadoop/fs/s3a/TestS3AUnbuffer.java | 50 +- .../fs/s3a/TestStreamChangeTracker.java | 107 +- .../fs/s3a/TestWildflyAndOpenSSLBinding.java | 9 +- .../fs/s3a/audit/AbstractAuditingTest.java | 97 +- .../hadoop/fs/s3a/audit/AuditTestSupport.java | 4 +- .../fs/s3a/audit/ITestAuditManager.java | 10 +- ...ava => SimpleAWSExecutionInterceptor.java} | 20 +- .../fs/s3a/audit/TestAuditIntegration.java | 82 +- .../fs/s3a/audit/TestAuditSpanLifecycle.java | 11 +- .../audit/TestHttpReferrerAuditHeader.java | 55 +- .../fs/s3a/audit/TestLoggingAuditor.java | 41 +- .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 26 +- .../hadoop/fs/s3a/auth/ITestCustomSigner.java | 45 +- .../hadoop/fs/s3a/auth/RoleTestUtils.java | 1 - .../s3a/auth/TestMarshalledCredentials.java | 10 +- .../hadoop/fs/s3a/auth/TestSignerManager.java | 41 +- .../delegation/CountInvocationsProvider.java | 13 +- .../ITestSessionDelegationInFilesystem.java | 28 +- .../ITestSessionDelegationTokens.java | 24 +- .../TestS3ADelegationTokenSupport.java | 1 - .../s3a/commit/AbstractITCommitProtocol.java | 2 +- .../s3a/commit/staging/StagingTestBase.java | 178 ++-- .../staging/TestDirectoryCommitterScale.java | 7 +- .../commit/staging/TestStagingCommitter.java | 32 +- .../TestStagingPartitionedTaskCommit.java | 8 +- .../ITestS3AFileContextStatistics.java | 3 +- .../s3a/impl/ITestPartialRenamesDeletes.java | 1 - .../fs/s3a/impl/ITestRenameDeleteRace.java | 5 +- .../fs/s3a/impl/TestHeaderProcessing.java | 32 +- .../fs/s3a/impl/TestNetworkBinding.java | 43 - .../fs/s3a/impl/TestRequestFactory.java | 107 +- .../fs/s3a/impl/TestSDKStreamDrainer.java | 16 +- .../ITestDirectoryMarkerListing.java | 23 +- .../fs/s3a/prefetch/MockS3ARemoteObject.java | 27 +- .../fs/s3a/prefetch/S3APrefetchFakes.java | 48 +- .../s3a/scale/AbstractSTestS3AHugeFiles.java | 25 +- .../ILoadTestS3ABulkDeleteThrottling.java | 13 +- .../scale/ITestS3ADirectoryPerformance.java | 22 +- .../scale/ITestS3AHugeFilesStorageClass.java | 2 +- .../fs/s3a/select/AbstractS3SelectTest.java | 4 +- .../fs/s3a/select/ITestS3SelectLandsat.java | 2 +- .../hadoop/fs/s3a/select/StreamPublisher.java | 89 ++ .../s3a/select/TestBlockingEnumeration.java | 200 ++++ .../TestSelectEventStreamPublisher.java | 188 ++++ .../hadoop/fs/s3a/test/ExtraAssertions.java | 2 +- .../s3a/test/MinimalOperationCallbacks.java | 14 +- .../MinimalWriteOperationHelperCallbacks.java | 16 +- .../hadoop/fs/s3a/tools/ITestMarkerTool.java | 7 - 187 files changed, 7044 insertions(+), 5228 deletions(-) delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java rename hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/{MultiObjectDeleteSupport.java => MultiObjectDeleteException.java} (62%) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java create mode 100644 hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md rename hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/{SimpleAWSRequestHandler.java => SimpleAWSExecutionInterceptor.java} (68%) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java index c04c1bb47fc..3a8927aba49 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java @@ -407,6 +407,10 @@ public final class StoreStatisticNames { public static final String MULTIPART_UPLOAD_LIST = "multipart_upload_list"; + /** Probe for store region: {@value}. */ + public static final String STORE_REGION_PROBE + = "store_region_probe"; + private StoreStatisticNames() { } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 8a27afbf85b..9933cfa883c 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -189,6 +189,8 @@ 900 1.12.316 2.7.1 + 2.19.12 + 0.21.0 1.11.2 2.1 0.7 @@ -1108,8 +1110,13 @@ com.amazonaws - aws-java-sdk-bundle + aws-java-sdk-core ${aws-java-sdk.version} + + + software.amazon.awssdk + bundle + ${aws-java-sdk-v2.version} io.netty @@ -1117,6 +1124,11 @@ + + software.amazon.awssdk.crt + aws-crt + ${awscrt.version} + org.apache.mina mina-core diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 207b1c0f941..5dd8cc8dfba 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -485,7 +485,17 @@ com.amazonaws - aws-java-sdk-bundle + aws-java-sdk-core + compile + + + software.amazon.awssdk + bundle + compile + + + software.amazon.awssdk.crt + aws-crt compile diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java index 482c5a1db7a..c5867eeab4f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * A 400 "Bad Request" exception was received. @@ -36,7 +36,7 @@ public class AWSBadRequestException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSBadRequestException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java index f31f3c79b35..43bf368671d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java @@ -18,34 +18,33 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonClientException; -import com.amazonaws.SdkBaseException; +import software.amazon.awssdk.core.exception.SdkException; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.io.IOException; + /** - * IOException equivalent of an {@link AmazonClientException}. + * IOException equivalent of an {@link SdkException}. */ public class AWSClientIOException extends IOException { private final String operation; public AWSClientIOException(String operation, - SdkBaseException cause) { + SdkException cause) { super(cause); Preconditions.checkArgument(operation != null, "Null 'operation' argument"); Preconditions.checkArgument(cause != null, "Null 'cause' argument"); this.operation = operation; } - public AmazonClientException getCause() { - return (AmazonClientException) super.getCause(); + public SdkException getCause() { + return (SdkException) super.getCause(); } @Override public String getMessage() { return operation + ": " + getCause().getMessage(); } - } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index 75b09a4ed74..2c76c45fee9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -27,10 +27,11 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.BasicSessionCredentials; +import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -43,6 +44,12 @@ import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException; import org.apache.hadoop.io.IOUtils; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import software.amazon.awssdk.core.exception.SdkException; + /** * A list of providers. * @@ -51,17 +58,17 @@ import org.apache.hadoop.io.IOUtils; *
    *
  1. Allows extra providers to be added dynamically.
  2. *
  3. If any provider in the chain throws an exception other than - * an {@link AmazonClientException}, that is rethrown, rather than + * an {@link SdkException}, that is rethrown, rather than * swallowed.
  4. *
  5. Has some more diagnostics.
  6. - *
  7. On failure, the last "relevant" AmazonClientException raised is + *
  8. On failure, the last "relevant" {@link SdkException} raised is * rethrown; exceptions other than 'no credentials' have priority.
  9. - *
  10. Special handling of {@link AnonymousAWSCredentials}.
  11. + *
  12. Special handling of {@link AnonymousCredentialsProvider}.
  13. *
*/ @InterfaceAudience.Private @InterfaceStability.Evolving -public final class AWSCredentialProviderList implements AWSCredentialsProvider, +public final class AWSCredentialProviderList implements AwsCredentialsProvider, AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger( @@ -73,9 +80,9 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, CREDENTIALS_REQUESTED_WHEN_CLOSED = "Credentials requested after provider list was closed"; - private final List providers = new ArrayList<>(1); + private final List providers = new ArrayList<>(1); private boolean reuseLastProvider = true; - private AWSCredentialsProvider lastProvider; + private AwsCredentialsProvider lastProvider; private final AtomicInteger refCount = new AtomicInteger(1); @@ -99,7 +106,9 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, */ public AWSCredentialProviderList( Collection providers) { - this.providers.addAll(providers); + for (AWSCredentialsProvider provider: providers) { + this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); + } } /** @@ -110,6 +119,19 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, public AWSCredentialProviderList(final String name, final AWSCredentialsProvider... providerArgs) { setName(name); + for (AWSCredentialsProvider provider: providerArgs) { + this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); + } + } + + /** + * Create with an initial list of SDK V2 credential providers. + * @param name name for error messages, may be "" + * @param providerArgs provider list. + */ + public AWSCredentialProviderList(final String name, + final AwsCredentialsProvider... providerArgs) { + setName(name); Collections.addAll(providers, providerArgs); } @@ -127,12 +149,21 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, /** * Add a new provider. - * @param p provider + * @param provider provider */ - public void add(AWSCredentialsProvider p) { - providers.add(p); + public void add(AWSCredentialsProvider provider) { + providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); } + /** + * Add a new SDK V2 provider. + * @param provider provider + */ + public void add(AwsCredentialsProvider provider) { + providers.add(provider); + } + + /** * Add all providers from another list to this one. * @param other the other list. @@ -142,15 +173,18 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, } /** - * Refresh all child entries. + * This method will get credentials using SDK V2's resolveCredentials and then convert it into + * V1 credentials. This required by delegation token binding classes. + * @return SDK V1 credentials */ - @Override - public void refresh() { - if (isClosed()) { - return; - } - for (AWSCredentialsProvider provider : providers) { - provider.refresh(); + public AWSCredentials getCredentials() { + AwsCredentials credentials = resolveCredentials(); + if (credentials instanceof AwsSessionCredentials) { + return new BasicSessionCredentials(credentials.accessKeyId(), + credentials.secretAccessKey(), + ((AwsSessionCredentials) credentials).sessionToken()); + } else { + return new BasicAWSCredentials(credentials.accessKeyId(), credentials.secretAccessKey()); } } @@ -160,7 +194,7 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, * @return a set of credentials (possibly anonymous), for authenticating. */ @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { if (isClosed()) { LOG.warn(CREDENTIALS_REQUESTED_WHEN_CLOSED); throw new NoAuthWithAWSException(name + @@ -168,18 +202,18 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, } checkNotEmpty(); if (reuseLastProvider && lastProvider != null) { - return lastProvider.getCredentials(); + return lastProvider.resolveCredentials(); } - AmazonClientException lastException = null; - for (AWSCredentialsProvider provider : providers) { + SdkException lastException = null; + for (AwsCredentialsProvider provider : providers) { try { - AWSCredentials credentials = provider.getCredentials(); + AwsCredentials credentials = provider.resolveCredentials(); Preconditions.checkNotNull(credentials, "Null credentials returned by %s", provider); - if ((credentials.getAWSAccessKeyId() != null && - credentials.getAWSSecretKey() != null) - || (credentials instanceof AnonymousAWSCredentials)) { + if ((credentials.accessKeyId() != null && credentials.secretAccessKey() != null) || ( + provider instanceof AnonymousCredentialsProvider + || provider instanceof AnonymousAWSCredentialsProvider)) { lastProvider = provider; LOG.debug("Using credentials from {}", provider); return credentials; @@ -196,7 +230,7 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, } LOG.debug("No credentials from {}: {}", provider, e.toString()); - } catch (AmazonClientException e) { + } catch (SdkException e) { lastException = e; LOG.debug("No credentials provided by {}: {}", provider, e.toString(), e); @@ -223,13 +257,13 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, * @return providers */ @VisibleForTesting - List getProviders() { + List getProviders() { return providers; } /** * Verify that the provider list is not empty. - * @throws AmazonClientException if there are no providers. + * @throws SdkException if there are no providers. */ public void checkNotEmpty() { if (providers.isEmpty()) { @@ -317,7 +351,7 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, } // do this outside the synchronized block. - for (AWSCredentialsProvider p : providers) { + for (AwsCredentialsProvider p : providers) { if (p instanceof Closeable) { IOUtils.closeStream((Closeable) p); } else if (p instanceof AutoCloseable) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java index e6a23b2361d..e4adc69bec6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java @@ -18,14 +18,14 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * Status code 443, no response from server. This is considered idempotent. */ public class AWSNoResponseException extends AWSServiceIOException { public AWSNoResponseException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java index bb337ee5eeb..cb478482a8e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * Request is redirected. @@ -32,7 +32,7 @@ public class AWSRedirectException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSRedirectException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java index 014d217b6a4..de1dd8b4a7a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java @@ -18,14 +18,13 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.AmazonS3Exception; +import software.amazon.awssdk.services.s3.model.S3Exception; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import java.util.Map; - /** - * Wrap a {@link AmazonS3Exception} as an IOE, relaying all + * Wrap a {@link S3Exception} as an IOE, relaying all * getters. */ @InterfaceAudience.Public @@ -38,24 +37,12 @@ public class AWSS3IOException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSS3IOException(String operation, - AmazonS3Exception cause) { + S3Exception cause) { super(operation, cause); } - public AmazonS3Exception getCause() { - return (AmazonS3Exception) super.getCause(); - } - - public String getErrorResponseXml() { - return getCause().getErrorResponseXml(); - } - - public Map getAdditionalDetails() { - return getCause().getAdditionalDetails(); - } - - public String getExtendedRequestId() { - return getCause().getExtendedRequestId(); + public S3Exception getCause() { + return (S3Exception) super.getCause(); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java index a9c2c984020..72d10951880 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java @@ -18,13 +18,15 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; + /** * A specific exception from AWS operations. - * The exception must always be created with an {@link AmazonServiceException}. + * The exception must always be created with an {@link AwsServiceException}. * The attributes of this exception can all be directly accessed. */ @InterfaceAudience.Public @@ -37,36 +39,31 @@ public class AWSServiceIOException extends AWSClientIOException { * @param cause the underlying cause */ public AWSServiceIOException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } - public AmazonServiceException getCause() { - return (AmazonServiceException) super.getCause(); + public AwsServiceException getCause() { + return (AwsServiceException) super.getCause(); } - public String getRequestId() { - return getCause().getRequestId(); + public boolean retryable() { + return getCause().retryable(); } - public String getServiceName() { - return getCause().getServiceName(); + public String requestId() { + return getCause().requestId(); } - public String getErrorCode() { - return getCause().getErrorCode(); + public AwsErrorDetails awsErrorDetails() { + return getCause().awsErrorDetails(); } - public int getStatusCode() { - return getCause().getStatusCode(); + public int statusCode() { + return getCause().statusCode(); } - public String getRawResponseContent() { - return getCause().getRawResponseContent(); + public String extendedRequestId() { + return getCause().extendedRequestId(); } - - public boolean isRetryable() { - return getCause().isRetryable(); - } - } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java index 131cea75622..ba9f1b0ac1f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * Exception raised when a service was throttled. @@ -36,7 +36,7 @@ public class AWSServiceThrottledException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSServiceThrottledException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java index 83be294fac7..1e13690bf9d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * A 500 response came back from a service. @@ -31,7 +31,7 @@ import com.amazonaws.AmazonServiceException; */ public class AWSStatus500Exception extends AWSServiceIOException { public AWSStatus500Exception(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java index 564c03bf731..dcfc2a03b12 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java @@ -18,9 +18,10 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -35,23 +36,18 @@ import org.apache.hadoop.classification.InterfaceStability; * property fs.s3a.aws.credentials.provider. Therefore, changing the class name * would be a backward-incompatible change. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Private @InterfaceStability.Stable -@Deprecated -public class AnonymousAWSCredentialsProvider implements AWSCredentialsProvider { +public class AnonymousAWSCredentialsProvider implements AwsCredentialsProvider { public static final String NAME = "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"; - public AWSCredentials getCredentials() { - return new AnonymousAWSCredentials(); + public AwsCredentials resolveCredentials() { + return AnonymousCredentialsProvider.create().resolveCredentials(); } - public void refresh() {} - @Override public String toString() { return getClass().getSimpleName(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 0294f772290..7c0283b1cec 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -20,7 +20,7 @@ package org.apache.hadoop.fs.s3a; import javax.annotation.Nonnull; -import com.amazonaws.arn.Arn; +import software.amazon.awssdk.arns.Arn; /** * Represents an Arn Resource, this can be an accesspoint or bucket. @@ -120,14 +120,14 @@ public final class ArnResource { public static ArnResource accessPointFromArn(String arn) throws IllegalArgumentException { Arn parsed = Arn.fromString(arn); - if (parsed.getRegion().isEmpty() || parsed.getAccountId().isEmpty() || - parsed.getResourceAsString().isEmpty()) { + if (!parsed.region().isPresent() || !parsed.accountId().isPresent() || + parsed.resourceAsString().isEmpty()) { throw new IllegalArgumentException( String.format("Access Point Arn %s has an invalid format or missing properties", arn)); } - String resourceName = parsed.getResource().getResource(); - return new ArnResource(resourceName, parsed.getAccountId(), parsed.getRegion(), - parsed.getPartition(), arn); + String resourceName = parsed.resource().resource(); + return new ArnResource(resourceName, parsed.accountId().get(), parsed.region().get(), + parsed.partition(), arn); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 451b9b0ee24..92fc50bed09 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -140,7 +140,6 @@ public final class Constants { public static final String ASSUMED_ROLE_POLICY = "fs.s3a.assumed.role.policy"; - @SuppressWarnings("deprecation") public static final String ASSUMED_ROLE_CREDENTIALS_DEFAULT = SimpleAWSCredentialsProvider.NAME; @@ -586,7 +585,7 @@ public final class Constants { public static final String SIGNING_ALGORITHM_STS = "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + "signing-algorithm"; + + ".signing-algorithm"; public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; @@ -735,7 +734,6 @@ public final class Constants { @InterfaceAudience.Private @InterfaceStability.Unstable - @SuppressWarnings("deprecation") public static final Class DEFAULT_S3_CLIENT_FACTORY_IMPL = DefaultS3ClientFactory.class; @@ -1275,4 +1273,9 @@ public final class Constants { */ public static final String STORE_CAPABILITY_DIRECTORY_MARKER_MULTIPART_UPLOAD_ENABLED = "fs.s3a.capability.multipart.uploads.enabled"; + + /** + * The bucket region header. + */ + public static final String BUCKET_REGION_HEADER = "x-amz-bucket-region"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java index 2f0cfd37ad3..ff6dc6a6037 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonClientException; +import software.amazon.awssdk.core.exception.SdkException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -26,8 +26,8 @@ import org.apache.hadoop.classification.InterfaceStability; /** * Exception which Hadoop's AWSCredentialsProvider implementations should * throw when there is a problem with the credential setup. This - * is a subclass of {@link AmazonClientException} which sets - * {@link #isRetryable()} to false, so as to fail fast. + * is a subclass of {@link SdkException} which sets + * {@link #retryable()} to false, so as to fail fast. * This is used in credential providers and elsewhere. * When passed through {@code S3AUtils.translateException()} it * is mapped to an AccessDeniedException. As a result, the Invoker @@ -35,13 +35,14 @@ import org.apache.hadoop.classification.InterfaceStability; */ @InterfaceAudience.Public @InterfaceStability.Stable -public class CredentialInitializationException extends AmazonClientException { +public class CredentialInitializationException extends SdkException { + public CredentialInitializationException(String message, Throwable t) { - super(message, t); + super(builder().message(message).cause(t)); } public CredentialInitializationException(String message) { - super(message); + super(builder().message(message)); } /** @@ -49,7 +50,7 @@ public class CredentialInitializationException extends AmazonClientException { * @return false, always. */ @Override - public boolean isRetryable() { + public boolean retryable() { return false; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index bab4ec896ff..1b2c129a642 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -20,32 +20,25 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.SdkClientException; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.regions.RegionUtils; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Builder; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.AmazonS3EncryptionClientV2Builder; -import com.amazonaws.services.s3.AmazonS3EncryptionV2; -import com.amazonaws.services.s3.S3ClientOptions; -import com.amazonaws.services.s3.internal.ServiceUtils; -import com.amazonaws.services.s3.model.CryptoConfigurationV2; -import com.amazonaws.services.s3.model.CryptoMode; -import com.amazonaws.services.s3.model.CryptoRangeGetMode; -import com.amazonaws.services.s3.model.EncryptionMaterialsProvider; -import com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider; -import com.amazonaws.util.AwsHostNameUtils; -import com.amazonaws.util.RuntimeHttpUtils; -import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; -import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3BaseClientBuilder; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.transfer.s3.S3TransferManager; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -54,15 +47,11 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector; import org.apache.hadoop.fs.store.LogExactlyOnce; -import static com.amazonaws.services.s3.Headers.REQUESTER_PAYS_HEADER; -import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; -import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; -import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; -import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; -import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; -import static org.apache.hadoop.fs.s3a.S3AUtils.getEncryptionAlgorithm; -import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey; -import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; + /** * The default {@link S3ClientFactory} implementation. @@ -71,12 +60,9 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; */ @InterfaceAudience.Private @InterfaceStability.Unstable -@SuppressWarnings("deprecation") public class DefaultS3ClientFactory extends Configured implements S3ClientFactory { - private static final String S3_SERVICE_NAME = "s3"; - private static final String REQUESTER_PAYS_HEADER_VALUE = "requester"; /** @@ -85,310 +71,153 @@ public class DefaultS3ClientFactory extends Configured protected static final Logger LOG = LoggerFactory.getLogger(DefaultS3ClientFactory.class); - /** - * A one-off warning of default region chains in use. - */ - private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN = - new LogExactlyOnce(LOG); - - /** - * Warning message printed when the SDK Region chain is in use. - */ - private static final String SDK_REGION_CHAIN_IN_USE = - "S3A filesystem client is using" - + " the SDK region resolution chain."; /** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */ private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG); - /** Bucket name. */ - private String bucket; - - /** - * Create the client by preparing the AwsConf configuration - * and then invoking {@code buildAmazonS3Client()}. - */ @Override - public AmazonS3 createS3Client( + public S3Client createS3Client( final URI uri, final S3ClientCreationParameters parameters) throws IOException { + Configuration conf = getConf(); - bucket = uri.getHost(); - final ClientConfiguration awsConf = S3AUtils - .createAwsConf(conf, - bucket, - Constants.AWS_SERVICE_IDENTIFIER_S3); + String bucket = uri.getHost(); + + ApacheHttpClient.Builder httpClientBuilder = AWSClientConfig + .createHttpClientBuilder(conf) + .proxyConfiguration(AWSClientConfig.createProxyConfiguration(conf, bucket)); + return configureClientBuilder(S3Client.builder(), parameters, conf, bucket) + .httpClientBuilder(httpClientBuilder) + .build(); + } + + @Override + public S3AsyncClient createS3AsyncClient( + final URI uri, + final S3ClientCreationParameters parameters) throws IOException { + + Configuration conf = getConf(); + String bucket = uri.getHost(); + NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig + .createAsyncHttpClientBuilder(conf) + .proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, bucket)); + return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket) + .httpClientBuilder(httpClientBuilder) + .build(); + } + + @Override + public S3TransferManager createS3TransferManager(final S3AsyncClient s3AsyncClient) { + + return S3TransferManager.builder() + .s3Client(s3AsyncClient) + .build(); + } + + /** + * Configure a sync or async S3 client builder. + * This method handles all shared configuration. + * @param builder S3 client builder + * @param parameters parameter object + * @param conf configuration object + * @param bucket bucket name + * @return the builder object + * @param S3 client builder type + * @param S3 client type + */ + private , ClientT> BuilderT configureClientBuilder( + BuilderT builder, S3ClientCreationParameters parameters, Configuration conf, String bucket) + throws IOException { + + Region region = parameters.getRegion(); + LOG.debug("Using region {}", region); + + URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); + + if (endpoint != null) { + builder.endpointOverride(endpoint); + LOG.debug("Using endpoint {}", endpoint); + } + + S3Configuration serviceConfiguration = S3Configuration.builder() + .pathStyleAccessEnabled(parameters.isPathStyleAccess()) + .build(); + + return builder + .overrideConfiguration(createClientOverrideConfiguration(parameters, conf)) + .credentialsProvider(parameters.getCredentialSet()) + .region(region) + .serviceConfiguration(serviceConfiguration); + } + + /** + * Create an override configuration for an S3 client. + * @param parameters parameter object + * @param conf configuration object + * @throws IOException any IOE raised, or translated exception + * @return the override configuration + */ + protected ClientOverrideConfiguration createClientOverrideConfiguration( + S3ClientCreationParameters parameters, Configuration conf) throws IOException { + final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = + AWSClientConfig.createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3); + // add any headers - parameters.getHeaders().forEach((h, v) -> - awsConf.addHeader(h, v)); + parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); if (parameters.isRequesterPays()) { // All calls must acknowledge requester will pay via header. - awsConf.addHeader(REQUESTER_PAYS_HEADER, REQUESTER_PAYS_HEADER_VALUE); + clientOverrideConfigBuilder.putHeader(REQUESTER_PAYS_HEADER, REQUESTER_PAYS_HEADER_VALUE); } - // When EXPERIMENTAL_AWS_INTERNAL_THROTTLING is false - // throttling is explicitly disabled on the S3 client so that - // all failures are collected in S3A instrumentation, and its - // retry policy is the only one used. - // This may cause problems in copy/rename. - awsConf.setUseThrottleRetries( - conf.getBoolean(EXPERIMENTAL_AWS_INTERNAL_THROTTLING, - EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT)); - if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) { - awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix()); + clientOverrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_SUFFIX, + parameters.getUserAgentSuffix()); } - // Get the encryption method for this bucket. - S3AEncryptionMethods encryptionMethods = - getEncryptionAlgorithm(bucket, conf); - try { - // If CSE is enabled then build a S3EncryptionClient. - if (S3AEncryptionMethods.CSE_KMS.getMethod() - .equals(encryptionMethods.getMethod())) { - return buildAmazonS3EncryptionClient( - awsConf, - parameters); - } else { - return buildAmazonS3Client( - awsConf, - parameters); + if (parameters.getExecutionInterceptors() != null) { + for (ExecutionInterceptor interceptor : parameters.getExecutionInterceptors()) { + clientOverrideConfigBuilder.addExecutionInterceptor(interceptor); } - } catch (SdkClientException e) { - // SDK refused to build. - throw translateException("creating AWS S3 client", uri.toString(), e); } - } - - /** - * Create an {@link AmazonS3} client of type - * {@link AmazonS3EncryptionV2} if CSE is enabled. - * - * @param awsConf AWS configuration. - * @param parameters parameters. - * - * @return new AmazonS3 client. - * @throws IOException if lookupPassword() has any problem. - */ - protected AmazonS3 buildAmazonS3EncryptionClient( - final ClientConfiguration awsConf, - final S3ClientCreationParameters parameters) throws IOException { - - AmazonS3 client; - AmazonS3EncryptionClientV2Builder builder = - new AmazonS3EncryptionClientV2Builder(); - Configuration conf = getConf(); - - // CSE-KMS Method - String kmsKeyId = getS3EncryptionKey(bucket, conf, true); - // Check if kmsKeyID is not null - Preconditions.checkArgument(!StringUtils.isBlank(kmsKeyId), "CSE-KMS " - + "method requires KMS key ID. Use " + S3_ENCRYPTION_KEY - + " property to set it. "); - - EncryptionMaterialsProvider materialsProvider = - new KMSEncryptionMaterialsProvider(kmsKeyId); - builder.withEncryptionMaterialsProvider(materialsProvider); - //Configure basic params of a S3 builder. - configureBasicParams(builder, awsConf, parameters); - - // Configuring endpoint. - AmazonS3EncryptionClientV2Builder.EndpointConfiguration epr - = createEndpointConfiguration(parameters.getEndpoint(), - awsConf, getConf().getTrimmed(AWS_REGION)); - configureEndpoint(builder, epr); - - // Create cryptoConfig. - CryptoConfigurationV2 cryptoConfigurationV2 = - new CryptoConfigurationV2(CryptoMode.AuthenticatedEncryption) - .withRangeGetMode(CryptoRangeGetMode.ALL); - if (epr != null) { - cryptoConfigurationV2 - .withAwsKmsRegion(RegionUtils.getRegion(epr.getSigningRegion())); - LOG.debug("KMS region used: {}", cryptoConfigurationV2.getAwsKmsRegion()); - } - builder.withCryptoConfiguration(cryptoConfigurationV2); - client = builder.build(); - IGNORE_CSE_WARN.info("S3 client-side encryption enabled: Ignore S3-CSE " - + "Warnings."); - - return client; - } - - /** - * Use the Builder API to create an AWS S3 client. - *

- * This has a more complex endpoint configuration mechanism - * which initially caused problems; the - * {@code withForceGlobalBucketAccessEnabled(true)} - * command is critical here. - * @param awsConf AWS configuration - * @param parameters parameters - * @return new AmazonS3 client - * @throws SdkClientException if the configuration is invalid. - */ - protected AmazonS3 buildAmazonS3Client( - final ClientConfiguration awsConf, - final S3ClientCreationParameters parameters) { - AmazonS3ClientBuilder b = AmazonS3Client.builder(); - configureBasicParams(b, awsConf, parameters); - - // endpoint set up is a PITA - AwsClientBuilder.EndpointConfiguration epr - = createEndpointConfiguration(parameters.getEndpoint(), - awsConf, getConf().getTrimmed(AWS_REGION)); - configureEndpoint(b, epr); - final AmazonS3 client = b.build(); - return client; - } - - /** - * A method to configure basic AmazonS3Builder parameters. - * - * @param builder Instance of AmazonS3Builder used. - * @param awsConf ClientConfiguration used. - * @param parameters Parameters used to set in the builder. - */ - private void configureBasicParams(AmazonS3Builder builder, - ClientConfiguration awsConf, S3ClientCreationParameters parameters) { - builder.withCredentials(parameters.getCredentialSet()); - builder.withClientConfiguration(awsConf); - builder.withPathStyleAccessEnabled(parameters.isPathStyleAccess()); if (parameters.getMetrics() != null) { - builder.withMetricsCollector( + clientOverrideConfigBuilder.addMetricPublisher( new AwsStatisticsCollector(parameters.getMetrics())); } - if (parameters.getRequestHandlers() != null) { - builder.withRequestHandlers( - parameters.getRequestHandlers().toArray(new RequestHandler2[0])); - } - if (parameters.getMonitoringListener() != null) { - builder.withMonitoringListener(parameters.getMonitoringListener()); - } + final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf); + clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()); + + return clientOverrideConfigBuilder.build(); } /** - * A method to configure endpoint and Region for an AmazonS3Builder. - * - * @param builder Instance of AmazonS3Builder used. - * @param epr EndpointConfiguration used to set in builder. - */ - private void configureEndpoint( - AmazonS3Builder builder, - AmazonS3Builder.EndpointConfiguration epr) { - if (epr != null) { - // an endpoint binding was constructed: use it. - builder.withEndpointConfiguration(epr); - } else { - // no idea what the endpoint is, so tell the SDK - // to work it out at the cost of an extra HEAD request - builder.withForceGlobalBucketAccessEnabled(true); - // HADOOP-17771 force set the region so the build process doesn't halt. - String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION); - LOG.debug("fs.s3a.endpoint.region=\"{}\"", region); - if (!region.isEmpty()) { - // there's either an explicit region or we have fallen back - // to the central one. - LOG.debug("Using default endpoint; setting region to {}", region); - builder.setRegion(region); - } else { - // no region. - // allow this if people really want it; it is OK to rely on this - // when deployed in EC2. - WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE); - LOG.debug(SDK_REGION_CHAIN_IN_USE); - } - } - } - - /** - * Configure classic S3 client. - *

- * This includes: endpoint, Path Access and possibly other - * options. - * - * @param s3 S3 Client. - * @param endPoint s3 endpoint, may be empty - * @param pathStyleAccess enable path style access? - * @return S3 client - * @throws IllegalArgumentException if misconfigured - */ - protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3, - final String endPoint, - final boolean pathStyleAccess) - throws IllegalArgumentException { - if (!endPoint.isEmpty()) { - try { - s3.setEndpoint(endPoint); - } catch (IllegalArgumentException e) { - String msg = "Incorrect endpoint: " + e.getMessage(); - LOG.error(msg); - throw new IllegalArgumentException(msg, e); - } - } - if (pathStyleAccess) { - LOG.debug("Enabling path style access!"); - s3.setS3ClientOptions(S3ClientOptions.builder() - .setPathStyleAccess(true) - .build()); - } - return s3; - } - - /** - * Given an endpoint string, return an endpoint config, or null, if none - * is needed. - *

- * This is a pretty painful piece of code. It is trying to replicate - * what AwsClient.setEndpoint() does, because you can't - * call that setter on an AwsClient constructed via - * the builder, and you can't pass a metrics collector - * down except through the builder. - *

- * Note also that AWS signing is a mystery which nobody fully - * understands, especially given all problems surface in a - * "400 bad request" response, which, like all security systems, - * provides minimal diagnostics out of fear of leaking - * secrets. + * Given a endpoint string, create the endpoint URI. * * @param endpoint possibly null endpoint. - * @param awsConf config to build the URI from. - * @param awsRegion AWS S3 Region if the corresponding config is set. - * @return a configuration for the S3 client builder. + * @param conf config to build the URI from. + * @return an endpoint uri */ - @VisibleForTesting - public static AwsClientBuilder.EndpointConfiguration - createEndpointConfiguration( - final String endpoint, final ClientConfiguration awsConf, - String awsRegion) { - LOG.debug("Creating endpoint configuration for \"{}\"", endpoint); + private static URI getS3Endpoint(String endpoint, final Configuration conf) { + + boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS); + + String protocol = secureConnections ? "https" : "http"; + if (endpoint == null || endpoint.isEmpty()) { - // the default endpoint...we should be using null at this point. - LOG.debug("Using default endpoint -no need to generate a configuration"); + // don't set an endpoint if none is configured, instead let the SDK figure it out. return null; } - final URI epr = RuntimeHttpUtils.toUri(endpoint, awsConf); - LOG.debug("Endpoint URI = {}", epr); - String region = awsRegion; - if (StringUtils.isBlank(region)) { - if (!ServiceUtils.isS3USStandardEndpoint(endpoint)) { - LOG.debug("Endpoint {} is not the default; parsing", epr); - region = AwsHostNameUtils.parseRegion( - epr.getHost(), - S3_SERVICE_NAME); - } else { - // US-east, set region == null. - LOG.debug("Endpoint {} is the standard one; declare region as null", - epr); - region = null; - } + if (!endpoint.contains("://")) { + endpoint = String.format("%s://%s", protocol, endpoint); + } + + try { + return new URI(endpoint); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); } - LOG.debug("Region for endpoint {}, URI {} is determined as {}", - endpoint, epr, region); - return new AwsClientBuilder.EndpointConfiguration(endpoint, region); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java index f3e00cc61d7..f5c9c43a49c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java @@ -36,7 +36,7 @@ public class FailureInjectionPolicy { public static final String DEFAULT_DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME"; private static final Logger LOG = - LoggerFactory.getLogger(InconsistentAmazonS3Client.class); + LoggerFactory.getLogger(FailureInjectionPolicy.class); /** * Probability of throttling a request. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java deleted file mode 100644 index 317f2c78576..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.DeleteObjectsResult; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.MultipartUploadListing; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; -import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; - -/** - * A wrapper around {@link com.amazonaws.services.s3.AmazonS3} that injects - * failures. - * It used to also inject inconsistency, but this was removed with S3Guard; - * what is retained is the ability to throttle AWS operations and for the - * input stream to be inconsistent. - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class InconsistentAmazonS3Client extends AmazonS3Client { - - private static final Logger LOG = - LoggerFactory.getLogger(InconsistentAmazonS3Client.class); - - private FailureInjectionPolicy policy; - - /** - * Counter of failures since last reset. - */ - private final AtomicLong failureCounter = new AtomicLong(0); - - - /** - * Instantiate. - * This subclasses a deprecated constructor of the parent - * {@code AmazonS3Client} class; we can't use the builder API because, - * that only creates the consistent client. - * @param credentials credentials to auth. - * @param clientConfiguration connection settings - * @param conf hadoop configuration. - */ - @SuppressWarnings("deprecation") - public InconsistentAmazonS3Client(AWSCredentialsProvider credentials, - ClientConfiguration clientConfiguration, Configuration conf) { - super(credentials, clientConfiguration); - policy = new FailureInjectionPolicy(conf); - } - - /** - * A way for tests to patch in a different fault injection policy at runtime. - * @param fs filesystem under test - * @param policy failure injection settings to set - * @throws Exception on failure - */ - public static void setFailureInjectionPolicy(S3AFileSystem fs, - FailureInjectionPolicy policy) throws Exception { - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("s3guard"); - InconsistentAmazonS3Client ic = InconsistentAmazonS3Client.castFrom(s3); - ic.replacePolicy(policy); - } - - private void replacePolicy(FailureInjectionPolicy pol) { - this.policy = pol; - } - - @Override - public String toString() { - return String.format("Inconsistent S3 Client: %s; failure count %d", - policy, failureCounter.get()); - } - - /** - * Convenience function for test code to cast from supertype. - * @param c supertype to cast from - * @return subtype, not null - * @throws Exception on error - */ - public static InconsistentAmazonS3Client castFrom(AmazonS3 c) throws - Exception { - InconsistentAmazonS3Client ic = null; - if (c instanceof InconsistentAmazonS3Client) { - ic = (InconsistentAmazonS3Client) c; - } - Preconditions.checkNotNull(ic, "Not an instance of " + - "InconsistentAmazonS3Client"); - return ic; - } - - @Override - public DeleteObjectsResult deleteObjects(DeleteObjectsRequest - deleteObjectsRequest) - throws AmazonClientException, AmazonServiceException { - maybeFail(); - return super.deleteObjects(deleteObjectsRequest); - } - - @Override - public void deleteObject(DeleteObjectRequest deleteObjectRequest) - throws AmazonClientException, AmazonServiceException { - String key = deleteObjectRequest.getKey(); - LOG.debug("key {}", key); - maybeFail(); - super.deleteObject(deleteObjectRequest); - } - - /* We should only need to override this version of putObject() */ - @Override - public PutObjectResult putObject(PutObjectRequest putObjectRequest) - throws AmazonClientException, AmazonServiceException { - LOG.debug("key {}", putObjectRequest.getKey()); - maybeFail(); - return super.putObject(putObjectRequest); - } - - /* We should only need to override these versions of listObjects() */ - @Override - public ObjectListing listObjects(ListObjectsRequest listObjectsRequest) - throws AmazonClientException, AmazonServiceException { - maybeFail(); - return super.listObjects(listObjectsRequest); - } - - /* consistent listing with possibility of failing. */ - @Override - public ListObjectsV2Result listObjectsV2(ListObjectsV2Request request) - throws AmazonClientException, AmazonServiceException { - maybeFail(); - return super.listObjectsV2(request); - } - - - @Override - public CompleteMultipartUploadResult completeMultipartUpload( - CompleteMultipartUploadRequest completeMultipartUploadRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.completeMultipartUpload(completeMultipartUploadRequest); - } - - @Override - public UploadPartResult uploadPart(UploadPartRequest uploadPartRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.uploadPart(uploadPartRequest); - } - - @Override - public InitiateMultipartUploadResult initiateMultipartUpload( - InitiateMultipartUploadRequest initiateMultipartUploadRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.initiateMultipartUpload(initiateMultipartUploadRequest); - } - - @Override - public MultipartUploadListing listMultipartUploads( - ListMultipartUploadsRequest listMultipartUploadsRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.listMultipartUploads(listMultipartUploadsRequest); - } - - /** - * Set the probability of throttling a request. - * @param throttleProbability the probability of a request being throttled. - */ - public void setThrottleProbability(float throttleProbability) { - policy.setThrottleProbability(throttleProbability); - } - - /** - * Conditionally fail the operation. - * @param errorMsg description of failure - * @param statusCode http status code for error - * @throws AmazonClientException if the client chooses to fail - * the request. - */ - private void maybeFail(String errorMsg, int statusCode) - throws AmazonClientException { - // code structure here is to line up for more failures later - AmazonServiceException ex = null; - if (FailureInjectionPolicy.trueWithProbability(policy.getThrottleProbability())) { - // throttle the request - ex = new AmazonServiceException(errorMsg - + " count = " + (failureCounter.get() + 1), null); - ex.setStatusCode(statusCode); - } - - int failureLimit = policy.getFailureLimit(); - if (ex != null) { - long count = failureCounter.incrementAndGet(); - if (failureLimit == 0 - || (failureLimit > 0 && count < failureLimit)) { - throw ex; - } - } - } - - private void maybeFail() { - maybeFail("throttled", 503); - } - - /** - * Set the limit on failures before all operations pass through. - * This resets the failure count. - * @param limit limit; "0" means "no limit" - */ - public void setFailureLimit(int limit) { - policy.setFailureLimit(limit); - failureCounter.set(0); - } - - @Override - public S3Object getObject(GetObjectRequest var1) throws SdkClientException, - AmazonServiceException { - maybeFail(); - return super.getObject(var1); - } - - @Override - public S3Object getObject(String bucketName, String key) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.getObject(bucketName, key); - - } - - /** Since ObjectListing is immutable, we just override it with wrapper. */ - @SuppressWarnings("serial") - private static class CustomObjectListing extends ObjectListing { - - private final List customListing; - private final List customPrefixes; - - CustomObjectListing(ObjectListing rawListing, - List customListing, - List customPrefixes) { - super(); - this.customListing = customListing; - this.customPrefixes = customPrefixes; - - this.setBucketName(rawListing.getBucketName()); - this.setCommonPrefixes(rawListing.getCommonPrefixes()); - this.setDelimiter(rawListing.getDelimiter()); - this.setEncodingType(rawListing.getEncodingType()); - this.setMarker(rawListing.getMarker()); - this.setMaxKeys(rawListing.getMaxKeys()); - this.setNextMarker(rawListing.getNextMarker()); - this.setPrefix(rawListing.getPrefix()); - this.setTruncated(rawListing.isTruncated()); - } - - @Override - public List getObjectSummaries() { - return customListing; - } - - @Override - public List getCommonPrefixes() { - return customPrefixes; - } - } - - @SuppressWarnings("serial") - private static class CustomListObjectsV2Result extends ListObjectsV2Result { - - private final List customListing; - private final List customPrefixes; - - CustomListObjectsV2Result(ListObjectsV2Result raw, - List customListing, List customPrefixes) { - super(); - this.customListing = customListing; - this.customPrefixes = customPrefixes; - - this.setBucketName(raw.getBucketName()); - this.setCommonPrefixes(raw.getCommonPrefixes()); - this.setDelimiter(raw.getDelimiter()); - this.setEncodingType(raw.getEncodingType()); - this.setStartAfter(raw.getStartAfter()); - this.setMaxKeys(raw.getMaxKeys()); - this.setContinuationToken(raw.getContinuationToken()); - this.setPrefix(raw.getPrefix()); - this.setTruncated(raw.isTruncated()); - } - - @Override - public List getObjectSummaries() { - return customListing; - } - - @Override - public List getCommonPrefixes() { - return customPrefixes; - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java index 4bfcc8aba3a..d519c1c0763 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java @@ -18,11 +18,19 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.services.s3.AmazonS3; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; /** * S3 Client factory used for testing with eventual consistency fault injection. @@ -30,25 +38,74 @@ import org.apache.hadoop.classification.InterfaceStability; * {@code hadoop-aws} module to enable integration tests to use this * just by editing the Hadoop configuration used to bring up the client. * - * The factory uses the older constructor-based instantiation/configuration - * of the client, so does not wire up metrics, handlers etc. + * The factory injects an {@link ExecutionInterceptor} to inject failures. */ @InterfaceAudience.Private @InterfaceStability.Unstable public class InconsistentS3ClientFactory extends DefaultS3ClientFactory { @Override - protected AmazonS3 buildAmazonS3Client( - final ClientConfiguration awsConf, - final S3ClientCreationParameters parameters) { + protected ClientOverrideConfiguration createClientOverrideConfiguration( + S3ClientCreationParameters parameters, Configuration conf) throws IOException { LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **"); LOG.warn("List inconsistency is no longer emulated; only throttling and read errors"); - InconsistentAmazonS3Client s3 - = new InconsistentAmazonS3Client( - parameters.getCredentialSet(), awsConf, getConf()); - configureAmazonS3Client(s3, - parameters.getEndpoint(), - parameters.isPathStyleAccess()); - return s3; + return super.createClientOverrideConfiguration(parameters, conf) + .toBuilder() + .addExecutionInterceptor(new FailureInjectionInterceptor( + new FailureInjectionPolicy(conf))) + .build(); + } + + private static class FailureInjectionInterceptor implements ExecutionInterceptor { + + private final FailureInjectionPolicy policy; + + /** + * Counter of failures since last reset. + */ + private final AtomicLong failureCounter = new AtomicLong(0); + + FailureInjectionInterceptor(FailureInjectionPolicy policy) { + this.policy = policy; + } + + @Override + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { + maybeFail(); + } + + private void maybeFail() { + maybeFail("throttled", 503); + } + + /** + * Conditionally fail the operation. + * @param errorMsg description of failure + * @param statusCode http status code for error + * @throws SdkException if the client chooses to fail + * the request. + */ + private void maybeFail(String errorMsg, int statusCode) + throws SdkException { + // code structure here is to line up for more failures later + AwsServiceException ex = null; + if (FailureInjectionPolicy.trueWithProbability(policy.getThrottleProbability())) { + // throttle the request + ex = AwsServiceException.builder() + .message(errorMsg + " count = " + (failureCounter.get() + 1)) + .statusCode(statusCode) + .build(); + } + + int failureLimit = policy.getFailureLimit(); + if (ex != null) { + long count = failureCounter.incrementAndGet(); + if (failureLimit == 0 + || (failureLimit > 0 && count < failureLimit)) { + throw ex; + } + } + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index 279bfeba987..ec232728eee 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -24,8 +24,7 @@ import java.util.Optional; import java.util.concurrent.Future; import javax.annotation.Nullable; -import com.amazonaws.AmazonClientException; -import com.amazonaws.SdkBaseException; +import software.amazon.awssdk.core.exception.SdkException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,6 +38,7 @@ import org.apache.hadoop.util.functional.FutureIO; import org.apache.hadoop.util.functional.InvocationRaisingIOE; import org.apache.hadoop.util.Preconditions; + import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration; /** @@ -120,7 +120,7 @@ public class Invoker { throws IOException { try (DurationInfo ignored = new DurationInfo(LOG, false, "%s", action)) { return operation.apply(); - } catch (AmazonClientException e) { + } catch (SdkException e) { throw S3AUtils.translateException(action, path, e); } } @@ -145,7 +145,7 @@ public class Invoker { throws IOException { try { return invokeTrackingDuration(tracker, operation); - } catch (AmazonClientException e) { + } catch (SdkException e) { throw S3AUtils.translateException(action, path, e); } } @@ -170,7 +170,7 @@ public class Invoker { /** * - * Wait for a future, translating AmazonClientException into an IOException. + * Wait for a future, translating SdkException into an IOException. * @param action action to execute (used in error messages) * @param path path of work (used in error messages) * @param future future to await for @@ -186,7 +186,7 @@ public class Invoker { throws IOException { try (DurationInfo ignored = new DurationInfo(LOG, false, "%s", action)) { return FutureIO.awaitFuture(future); - } catch (AmazonClientException e) { + } catch (SdkException e) { throw S3AUtils.translateException(action, path, e); } } @@ -444,7 +444,7 @@ public class Invoker { * @param operation operation to execute * @return the result of the call * @throws IOException any IOE raised - * @throws SdkBaseException any AWS exception raised + * @throws SdkException any AWS exception raised * @throws RuntimeException : these are never caught and retries. */ @Retries.RetryRaw @@ -466,7 +466,7 @@ public class Invoker { } // execute the operation, returning if successful return operation.apply(); - } catch (IOException | SdkBaseException e) { + } catch (IOException | SdkException e) { caught = e; } // you only get here if the operation didn't complete @@ -478,7 +478,7 @@ public class Invoker { translated = (IOException) caught; } else { translated = S3AUtils.translateException(text, "", - (SdkBaseException)caught); + (SdkException) caught); } try { @@ -517,11 +517,10 @@ public class Invoker { if (caught instanceof IOException) { throw (IOException) caught; } else { - throw (SdkBaseException) caught; + throw (SdkException) caught; } } - /** * Execute an operation; any exception raised is simply caught and * logged at debug. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index 6c39cc4b642..490deaaab04 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -18,7 +18,8 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.S3ObjectSummary; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.S3Object; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.VisibleForTesting; @@ -277,19 +278,19 @@ public class Listing extends AbstractStoreOperation { } /** - * Interface to implement by the logic deciding whether to accept a summary + * Interface to implement the logic deciding whether to accept a s3Object * entry or path as a valid file or directory. */ interface FileStatusAcceptor { /** - * Predicate to decide whether or not to accept a summary entry. + * Predicate to decide whether or not to accept a s3Object entry. * @param keyPath qualified path to the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated. */ - boolean accept(Path keyPath, S3ObjectSummary summary); + boolean accept(Path keyPath, S3Object s3Object); /** * Predicate to decide whether or not to accept a prefix. @@ -451,21 +452,21 @@ public class Listing extends AbstractStoreOperation { int added = 0, ignored = 0; // list to fill in with results. Initial size will be list maximum. List stats = new ArrayList<>( - objects.getObjectSummaries().size() + + objects.getS3Objects().size() + objects.getCommonPrefixes().size()); // objects - for (S3ObjectSummary summary : objects.getObjectSummaries()) { - String key = summary.getKey(); + for (S3Object s3Object : objects.getS3Objects()) { + String key = s3Object.key(); Path keyPath = getStoreContext().getContextAccessors().keyToPath(key); if (LOG.isDebugEnabled()) { - LOG.debug("{}: {}", keyPath, stringify(summary)); + LOG.debug("{}: {}", keyPath, stringify(s3Object)); } // Skip over keys that are ourselves and old S3N _$folder$ files - if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) { - S3AFileStatus status = createFileStatus(keyPath, summary, + if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) { + S3AFileStatus status = createFileStatus(keyPath, s3Object, listingOperationCallbacks.getDefaultBlockSize(keyPath), getStoreContext().getUsername(), - summary.getETag(), null, isCSEEnabled); + s3Object.eTag(), null, isCSEEnabled); LOG.debug("Adding: {}", status); stats.add(status); added++; @@ -476,11 +477,11 @@ public class Listing extends AbstractStoreOperation { } // prefixes: always directories - for (String prefix : objects.getCommonPrefixes()) { + for (CommonPrefix prefix : objects.getCommonPrefixes()) { Path keyPath = getStoreContext() .getContextAccessors() - .keyToPath(prefix); - if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) { + .keyToPath(prefix.prefix()); + if (acceptor.accept(keyPath, prefix.prefix()) && filter.accept(keyPath)) { S3AFileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath, getStoreContext().getUsername()); LOG.debug("Adding directory: {}", status); @@ -731,18 +732,18 @@ public class Listing extends AbstractStoreOperation { } /** - * Reject a summary entry if the key path is the qualified Path, or + * Reject a s3Object entry if the key path is the qualified Path, or * it ends with {@code "_$folder$"}. * @param keyPath key path of the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated. */ @Override - public boolean accept(Path keyPath, S3ObjectSummary summary) { + public boolean accept(Path keyPath, S3Object s3Object) { return !keyPath.equals(qualifiedPath) - && !summary.getKey().endsWith(S3N_FOLDER_SUFFIX) - && !objectRepresentsDirectory(summary.getKey()); + && !s3Object.key().endsWith(S3N_FOLDER_SUFFIX) + && !objectRepresentsDirectory(s3Object.key()); } /** @@ -767,8 +768,8 @@ public class Listing extends AbstractStoreOperation { */ static class AcceptAllButS3nDirs implements FileStatusAcceptor { - public boolean accept(Path keyPath, S3ObjectSummary summary) { - return !summary.getKey().endsWith(S3N_FOLDER_SUFFIX); + public boolean accept(Path keyPath, S3Object s3Object) { + return !s3Object.key().endsWith(S3N_FOLDER_SUFFIX); } public boolean accept(Path keyPath, String prefix) { @@ -799,17 +800,17 @@ public class Listing extends AbstractStoreOperation { } /** - * Reject a summary entry if the key path is the qualified Path, or + * Reject a s3Object entry if the key path is the qualified Path, or * it ends with {@code "_$folder$"}. * @param keyPath key path of the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated.) */ @Override - public boolean accept(Path keyPath, S3ObjectSummary summary) { + public boolean accept(Path keyPath, S3Object s3Object) { return !keyPath.equals(qualifiedPath) && - !summary.getKey().endsWith(S3N_FOLDER_SUFFIX); + !s3Object.key().endsWith(S3N_FOLDER_SUFFIX); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java index d8c820cd8a1..296ec18dcf1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java @@ -23,13 +23,14 @@ import java.util.ListIterator; import java.util.NoSuchElementException; import javax.annotation.Nullable; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.MultipartUploadListing; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; +import software.amazon.awssdk.services.s3.model.MultipartUpload; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.impl.StoreContext; @@ -43,7 +44,7 @@ import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDura * MultipartUtils upload-specific functions for use by S3AFileSystem and Hadoop * CLI. * The Audit span active when - * {@link #listMultipartUploads(StoreContext, AmazonS3, String, int)} + * {@link #listMultipartUploads(StoreContext, S3Client, String, int)} * was invoked is retained for all subsequent operations. */ public final class MultipartUtils { @@ -67,7 +68,7 @@ public final class MultipartUtils { */ static MultipartUtils.UploadIterator listMultipartUploads( final StoreContext storeContext, - AmazonS3 s3, + S3Client s3, @Nullable String prefix, int maxKeys) throws IOException { @@ -84,14 +85,14 @@ public final class MultipartUtils { * at the time the iterator was constructed. */ static class ListingIterator implements - RemoteIterator { + RemoteIterator { private final String prefix; private final RequestFactory requestFactory; private final int maxKeys; - private final AmazonS3 s3; + private final S3Client s3; private final Invoker invoker; private final AuditSpan auditSpan; @@ -101,7 +102,7 @@ public final class MultipartUtils { /** * Most recent listing results. */ - private MultipartUploadListing listing; + private ListMultipartUploadsResponse listing; /** * Indicator that this is the first listing. @@ -114,7 +115,7 @@ public final class MultipartUtils { private int listCount = 0; ListingIterator(final StoreContext storeContext, - AmazonS3 s3, + S3Client s3, @Nullable String prefix, int maxKeys) throws IOException { this.storeContext = storeContext; @@ -153,7 +154,7 @@ public final class MultipartUtils { */ @Override @Retries.RetryTranslated - public MultipartUploadListing next() throws IOException { + public ListMultipartUploadsResponse next() throws IOException { if (firstListing) { firstListing = false; } else { @@ -171,32 +172,34 @@ public final class MultipartUtils { public String toString() { return "Upload iterator: prefix " + prefix + "; list count " + listCount - + "; upload count " + listing.getMultipartUploads().size() + + "; upload count " + listing.uploads().size() + "; isTruncated=" + listing.isTruncated(); } @Retries.RetryTranslated private void requestNextBatch() throws IOException { try (AuditSpan span = auditSpan.activate()) { - ListMultipartUploadsRequest req = requestFactory - .newListMultipartUploadsRequest(prefix); + ListMultipartUploadsRequest.Builder requestBuilder = requestFactory + .newListMultipartUploadsRequestBuilder(prefix); if (!firstListing) { - req.setKeyMarker(listing.getNextKeyMarker()); - req.setUploadIdMarker(listing.getNextUploadIdMarker()); + requestBuilder.keyMarker(listing.nextKeyMarker()); + requestBuilder.uploadIdMarker(listing.nextUploadIdMarker()); } - req.setMaxUploads(maxKeys); + requestBuilder.maxUploads(maxKeys); + + ListMultipartUploadsRequest request = requestBuilder.build(); LOG.debug("[{}], Requesting next {} uploads prefix {}, " + "next key {}, next upload id {}", listCount, maxKeys, prefix, - req.getKeyMarker(), req.getUploadIdMarker()); + request.keyMarker(), request.uploadIdMarker()); listCount++; listing = invoker.retry("listMultipartUploads", prefix, true, trackDurationOfOperation(storeContext.getInstrumentation(), MULTIPART_UPLOAD_LIST.getSymbol(), - () -> s3.listMultipartUploads(req))); + () -> s3.listMultipartUploads(requestBuilder.build()))); LOG.debug("Listing found {} upload(s)", - listing.getMultipartUploads().size()); + listing.uploads().size()); LOG.debug("New listing state: {}", this); } } @@ -216,14 +219,14 @@ public final class MultipartUtils { */ private ListingIterator lister; /** Current listing: the last upload listing we fetched. */ - private MultipartUploadListing listing; + private ListMultipartUploadsResponse listing; /** Iterator over the current listing. */ private ListIterator batchIterator; @Retries.RetryTranslated public UploadIterator( final StoreContext storeContext, - AmazonS3 s3, + S3Client s3, int maxKeys, @Nullable String prefix) throws IOException { @@ -249,7 +252,7 @@ public final class MultipartUtils { private boolean requestNextBatch() throws IOException { if (lister.hasNext()) { listing = lister.next(); - batchIterator = listing.getMultipartUploads().listIterator(); + batchIterator = listing.uploads().listIterator(); return batchIterator.hasNext(); } return false; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java index 0ce022aa885..b614b379bd6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java @@ -18,59 +18,55 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.event.ProgressEvent; -import com.amazonaws.event.ProgressEventType; -import com.amazonaws.event.ProgressListener; -import com.amazonaws.services.s3.transfer.Upload; +import software.amazon.awssdk.transfer.s3.model.ObjectTransfer; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; -import static com.amazonaws.event.ProgressEventType.TRANSFER_COMPLETED_EVENT; -import static com.amazonaws.event.ProgressEventType.TRANSFER_PART_STARTED_EVENT; /** * Listener to progress from AWS regarding transfers. */ -public class ProgressableProgressListener implements ProgressListener { +public class ProgressableProgressListener implements TransferListener { private static final Logger LOG = S3AFileSystem.LOG; private final S3AFileSystem fs; private final String key; private final Progressable progress; private long lastBytesTransferred; - private final Upload upload; /** * Instantiate. * @param fs filesystem: will be invoked with statistics updates * @param key key for the upload - * @param upload source of events * @param progress optional callback for progress. */ public ProgressableProgressListener(S3AFileSystem fs, String key, - Upload upload, Progressable progress) { this.fs = fs; this.key = key; - this.upload = upload; this.progress = progress; this.lastBytesTransferred = 0; } @Override - public void progressChanged(ProgressEvent progressEvent) { - if (progress != null) { + public void transferInitiated(TransferListener.Context.TransferInitiated context) { + fs.incrementWriteOperations(); + } + + @Override + public void transferComplete(TransferListener.Context.TransferComplete context) { + fs.incrementWriteOperations(); + } + + @Override + public void bytesTransferred(TransferListener.Context.BytesTransferred context) { + + if(progress != null) { progress.progress(); } - // There are 3 http ops here, but this should be close enough for now - ProgressEventType pet = progressEvent.getEventType(); - if (pet == TRANSFER_PART_STARTED_EVENT || - pet == TRANSFER_COMPLETED_EVENT) { - fs.incrementWriteOperations(); - } - - long transferred = upload.getProgress().getBytesTransferred(); + long transferred = context.progressSnapshot().transferredBytes(); long delta = transferred - lastBytesTransferred; fs.incrementPutProgressStatistics(key, delta); lastBytesTransferred = transferred; @@ -81,9 +77,10 @@ public class ProgressableProgressListener implements ProgressListener { * This can handle race conditions in setup/teardown. * @return the number of bytes which were transferred after the notification */ - public long uploadCompleted() { - long delta = upload.getProgress().getBytesTransferred() - - lastBytesTransferred; + public long uploadCompleted(ObjectTransfer upload) { + + long delta = + upload.progress().snapshot().transferredBytes() - lastBytesTransferred; if (delta > 0) { LOG.debug("S3A write delta changed after finished: {} bytes", delta); fs.incrementPutProgressStatistics(key, delta); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java index 43a2b7e0dbd..de0f59154e9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java @@ -31,15 +31,16 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.SdkBaseException; -import com.amazonaws.event.ProgressEvent; -import com.amazonaws.event.ProgressEventType; -import com.amazonaws.event.ProgressListener; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; +import org.apache.hadoop.fs.s3a.impl.ProgressListener; +import org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.statistics.IOStatisticsAggregator; import org.apache.hadoop.util.Preconditions; @@ -69,6 +70,7 @@ import org.apache.hadoop.util.Progressable; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.Statistic.*; +import static org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent.*; import static org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext.EMPTY_BLOCK_OUTPUT_STREAM_STATISTICS; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation; @@ -192,7 +194,7 @@ class S3ABlockOutputStream extends OutputStream implements this.executorService = MoreExecutors.listeningDecorator( builder.executorService); this.multiPartUpload = null; - final Progressable progress = builder.progress; + Progressable progress = builder.progress; this.progressListener = (progress instanceof ProgressListener) ? (ProgressListener) progress : new ProgressableListener(progress); @@ -439,7 +441,7 @@ class S3ABlockOutputStream extends OutputStream implements uploadCurrentBlock(true); } // wait for the partial uploads to finish - final List partETags = + final List partETags = multiPartUpload.waitForAllPartUploads(); bytes = bytesSubmitted; @@ -597,27 +599,28 @@ class S3ABlockOutputStream extends OutputStream implements final PutObjectRequest putObjectRequest = uploadData.hasFile() ? writeOperationHelper.createPutObjectRequest( key, - uploadData.getFile(), - builder.putOptions) + uploadData.getFile().length(), + builder.putOptions, + true) : writeOperationHelper.createPutObjectRequest( key, - uploadData.getUploadStream(), size, - builder.putOptions); - BlockUploadProgress callback = - new BlockUploadProgress( - block, progressListener, now()); - putObjectRequest.setGeneralProgressListener(callback); + builder.putOptions, + false); + + BlockUploadProgress progressCallback = + new BlockUploadProgress(block, progressListener, now()); statistics.blockUploadQueued(size); - ListenableFuture putObjectResult = + ListenableFuture putObjectResult = executorService.submit(() -> { try { // the putObject call automatically closes the input // stream afterwards. - return writeOperationHelper.putObject( - putObjectRequest, - builder.putOptions, - statistics); + PutObjectResponse response = + writeOperationHelper.putObject(putObjectRequest, builder.putOptions, uploadData, + uploadData.hasFile(), statistics); + progressCallback.progressChanged(REQUEST_BYTE_TRANSFER_EVENT); + return response; } finally { cleanupWithLogger(LOG, uploadData, block); } @@ -761,7 +764,7 @@ class S3ABlockOutputStream extends OutputStream implements */ private class MultiPartUpload { private final String uploadId; - private final List> partETagsFutures; + private final List> partETagsFutures; private int partsSubmitted; private int partsUploaded; private long bytesSubmitted; @@ -866,18 +869,19 @@ class S3ABlockOutputStream extends OutputStream implements final int currentPartNumber = partETagsFutures.size() + 1; final UploadPartRequest request; final S3ADataBlocks.BlockUploadData uploadData; + final RequestBody requestBody; try { uploadData = block.startUpload(); - request = writeOperationHelper.newUploadPartRequest( + requestBody = uploadData.hasFile() + ? RequestBody.fromFile(uploadData.getFile()) + : RequestBody.fromInputStream(uploadData.getUploadStream(), size); + + request = writeOperationHelper.newUploadPartRequestBuilder( key, uploadId, currentPartNumber, - size, - uploadData.getUploadStream(), - uploadData.getFile(), - 0L); - request.setLastPart(isLast); - } catch (SdkBaseException aws) { + size).build(); + } catch (SdkException aws) { // catch and translate IOException e = translateException("upload", key, aws); // failure to start the upload. @@ -888,28 +892,38 @@ class S3ABlockOutputStream extends OutputStream implements noteUploadFailure(e); throw e; } - BlockUploadProgress callback = - new BlockUploadProgress( - block, progressListener, now()); - request.setGeneralProgressListener(callback); + + BlockUploadProgress progressCallback = + new BlockUploadProgress(block, progressListener, now()); + statistics.blockUploadQueued(block.dataSize()); - ListenableFuture partETagFuture = + ListenableFuture partETagFuture = executorService.submit(() -> { // this is the queued upload operation // do the upload try { LOG.debug("Uploading part {} for id '{}'", currentPartNumber, uploadId); - PartETag partETag = writeOperationHelper.uploadPart(request, statistics) - .getPartETag(); + + progressCallback.progressChanged(TRANSFER_PART_STARTED_EVENT); + + UploadPartResponse response = writeOperationHelper + .uploadPart(request, requestBody, statistics); LOG.debug("Completed upload of {} to part {}", - block, partETag.getETag()); + block, response.eTag()); LOG.debug("Stream statistics of {}", statistics); partsUploaded++; - return partETag; + + progressCallback.progressChanged(TRANSFER_PART_COMPLETED_EVENT); + + return CompletedPart.builder() + .eTag(response.eTag()) + .partNumber(currentPartNumber) + .build(); } catch (IOException e) { // save immediately. noteUploadFailure(e); + progressCallback.progressChanged(TRANSFER_PART_FAILED_EVENT); throw e; } finally { // close the stream and block @@ -924,7 +938,7 @@ class S3ABlockOutputStream extends OutputStream implements * @return list of results * @throws IOException IO Problems */ - private List waitForAllPartUploads() throws IOException { + private List waitForAllPartUploads() throws IOException { LOG.debug("Waiting for {} uploads to complete", partETagsFutures.size()); try { return Futures.allAsList(partETagsFutures).get(); @@ -948,7 +962,7 @@ class S3ABlockOutputStream extends OutputStream implements */ private void cancelAllActiveFutures() { LOG.debug("Cancelling futures"); - for (ListenableFuture future : partETagsFutures) { + for (ListenableFuture future : partETagsFutures) { future.cancel(true); } } @@ -960,7 +974,7 @@ class S3ABlockOutputStream extends OutputStream implements * @param partETags list of partial uploads * @throws IOException on any problem */ - private void complete(List partETags) + private void complete(List partETags) throws IOException { maybeRethrowUploadFailure(); AtomicInteger errorCount = new AtomicInteger(0); @@ -1005,22 +1019,24 @@ class S3ABlockOutputStream extends OutputStream implements } } + /** * The upload progress listener registered for events returned * during the upload of a single block. * It updates statistics and handles the end of the upload. * Transfer failures are logged at WARN. */ - private final class BlockUploadProgress implements ProgressListener { + private final class BlockUploadProgress { + private final S3ADataBlocks.DataBlock block; private final ProgressListener nextListener; private final Instant transferQueueTime; private Instant transferStartTime; + private long size; /** * Track the progress of a single block upload. * @param block block to monitor - * @param nextListener optional next progress listener * @param transferQueueTime time the block was transferred * into the queue */ @@ -1029,20 +1045,17 @@ class S3ABlockOutputStream extends OutputStream implements Instant transferQueueTime) { this.block = block; this.transferQueueTime = transferQueueTime; + this.size = block.dataSize(); this.nextListener = nextListener; } - @Override - public void progressChanged(ProgressEvent progressEvent) { - ProgressEventType eventType = progressEvent.getEventType(); - long bytesTransferred = progressEvent.getBytesTransferred(); + public void progressChanged(ProgressListenerEvent eventType) { - long size = block.dataSize(); switch (eventType) { case REQUEST_BYTE_TRANSFER_EVENT: // bytes uploaded - statistics.bytesTransferred(bytesTransferred); + statistics.bytesTransferred(size); break; case TRANSFER_PART_STARTED_EVENT: @@ -1057,6 +1070,7 @@ class S3ABlockOutputStream extends OutputStream implements statistics.blockUploadCompleted( Duration.between(transferStartTime, now()), size); + statistics.bytesTransferred(size); break; case TRANSFER_PART_FAILED_EVENT: @@ -1071,13 +1085,13 @@ class S3ABlockOutputStream extends OutputStream implements } if (nextListener != null) { - nextListener.progressChanged(progressEvent); + nextListener.progressChanged(eventType, size); } } } /** - * Bridge from AWS {@code ProgressListener} to Hadoop {@link Progressable}. + * Bridge from {@link ProgressListener} to Hadoop {@link Progressable}. */ private static class ProgressableListener implements ProgressListener { private final Progressable progress; @@ -1086,7 +1100,7 @@ class S3ABlockOutputStream extends OutputStream implements this.progress = progress; } - public void progressChanged(ProgressEvent progressEvent) { + public void progressChanged(ProgressListenerEvent eventType, int bytesTransferred) { if (progress != null) { progress.progress(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java index ddaf059b055..5aa43245c2f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java @@ -47,7 +47,7 @@ import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; * Set of classes to support output streaming into blocks which are then * uploaded as to S3 as a single PUT, or as part of a multipart request. */ -final class S3ADataBlocks { +public final class S3ADataBlocks { private static final Logger LOG = LoggerFactory.getLogger(S3ADataBlocks.class); @@ -101,7 +101,7 @@ final class S3ADataBlocks { * It can be one of a file or an input stream. * When closed, any stream is closed. Any source file is untouched. */ - static final class BlockUploadData implements Closeable { + public static final class BlockUploadData implements Closeable { private final File file; private final InputStream uploadStream; @@ -109,7 +109,7 @@ final class S3ADataBlocks { * File constructor; input stream will be null. * @param file file to upload */ - BlockUploadData(File file) { + public BlockUploadData(File file) { Preconditions.checkArgument(file.exists(), "No file: " + file); this.file = file; this.uploadStream = null; @@ -119,7 +119,7 @@ final class S3ADataBlocks { * Stream constructor, file field will be null. * @param uploadStream stream to upload */ - BlockUploadData(InputStream uploadStream) { + public BlockUploadData(InputStream uploadStream) { Preconditions.checkNotNull(uploadStream, "rawUploadStream"); this.uploadStream = uploadStream; this.file = null; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 155a91a8af1..624ade47599 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -21,17 +21,20 @@ package org.apache.hadoop.fs.s3a; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.InterruptedIOException; import java.io.UncheckedIOException; import java.net.URI; import java.nio.file.AccessDeniedException; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.EnumSet; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -41,6 +44,7 @@ import java.util.Set; import java.util.Objects; import java.util.TreeSet; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -48,43 +52,53 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nullable; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.Headers; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.DeleteObjectsResult; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; -import com.amazonaws.services.s3.transfer.Copy; -import com.amazonaws.services.s3.transfer.TransferManager; -import com.amazonaws.services.s3.transfer.TransferManagerConfiguration; -import com.amazonaws.services.s3.transfer.Upload; -import com.amazonaws.services.s3.transfer.model.CopyResult; -import com.amazonaws.services.s3.transfer.model.UploadResult; -import com.amazonaws.event.ProgressListener; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.NoSuchBucketException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.S3Error; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; +import software.amazon.awssdk.services.s3.model.StorageClass; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; +import software.amazon.awssdk.transfer.s3.model.CompletedCopy; +import software.amazon.awssdk.transfer.s3.model.CompletedFileUpload; +import software.amazon.awssdk.transfer.s3.model.Copy; +import software.amazon.awssdk.transfer.s3.S3TransferManager; +import software.amazon.awssdk.transfer.s3.model.CopyRequest; +import software.amazon.awssdk.transfer.s3.model.FileUpload; +import software.amazon.awssdk.transfer.s3.model.UploadFileRequest; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -108,11 +122,12 @@ import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; import org.apache.hadoop.fs.s3a.auth.SignerManager; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationOperations; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationTokenProvider; +import org.apache.hadoop.fs.s3a.impl.AWSCannedACL; +import org.apache.hadoop.fs.s3a.impl.AWSHeaders; import org.apache.hadoop.fs.s3a.impl.BulkDeleteRetryHandler; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ContextAccessors; import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation; -import org.apache.hadoop.fs.s3a.impl.CopyOutcome; import org.apache.hadoop.fs.s3a.impl.CreateFileBuilder; import org.apache.hadoop.fs.s3a.impl.DeleteOperation; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy; @@ -122,6 +137,7 @@ import org.apache.hadoop.fs.s3a.impl.HeaderProcessing; import org.apache.hadoop.fs.s3a.impl.InternalConstants; import org.apache.hadoop.fs.s3a.impl.ListingOperationCallbacks; import org.apache.hadoop.fs.s3a.impl.MkdirOperation; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.OpenFileSupport; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; @@ -141,6 +157,7 @@ import org.apache.hadoop.fs.statistics.IOStatistics; import org.apache.hadoop.fs.statistics.IOStatisticsSource; import org.apache.hadoop.fs.statistics.IOStatisticsContext; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.fs.store.LogExactlyOnce; import org.apache.hadoop.fs.store.audit.AuditEntryPoint; import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource; import org.apache.hadoop.fs.store.audit.AuditSpan; @@ -213,6 +230,7 @@ import static org.apache.hadoop.fs.s3a.Listing.toLocatedFileStatusIterator; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.INITIALIZE_SPAN; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderSet; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.STATEMENT_ALLOW_SSE_KMS_RW; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.allowS3Operations; import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.TokenIssuingPolicy.NoTokensAvailable; @@ -224,14 +242,14 @@ import static org.apache.hadoop.fs.s3a.impl.CreateFileBuilder.OPTIONS_CREATE_FIL import static org.apache.hadoop.fs.s3a.impl.CreateFileBuilder.OPTIONS_CREATE_FILE_OVERWRITE; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isObjectNotFound; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AP_INACCESSIBLE; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AP_REQUIRED_EXCEPTION; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.ARN_BUCKET_OPTION; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DEFAULT_UPLOAD_PART_COUNT_LIMIT; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DELETE_CONSIDERED_IDEMPOTENT; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_403; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_403_FORBIDDEN; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.UPLOAD_PART_COUNT_LIMIT; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup; @@ -275,7 +293,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private URI uri; private Path workingDir; private String username; - private AmazonS3 s3; + private S3Client s3Client; + private S3AsyncClient s3AsyncClient; // initial callback policy is fail-once; it's there just to assist // some mock tests and other codepaths trying to call the low level // APIs on an uninitialized filesystem. @@ -294,7 +313,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private Listing listing; private long partSize; private boolean enableMultiObjectsDelete; - private TransferManager transfers; + private S3TransferManager transferManager; private ExecutorService boundedThreadPool; private ThreadPoolExecutor unboundedThreadPool; @@ -313,10 +332,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private int executorCapacity; private long multiPartThreshold; public static final Logger LOG = LoggerFactory.getLogger(S3AFileSystem.class); + /** Exactly once log to warn about setting the region in config to avoid probe. */ + private static final LogExactlyOnce SET_REGION_WARNING = new LogExactlyOnce(LOG); private static final Logger PROGRESS = LoggerFactory.getLogger("org.apache.hadoop.fs.s3a.S3AFileSystem.Progress"); private LocalDirAllocator directoryAllocator; - private CannedAccessControlList cannedACL; + private String cannedACL; /** * This must never be null; until initialized it just declares that there @@ -429,6 +450,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private String scheme = FS_S3A; + private final static Map BUCKET_REGIONS = new HashMap<>(); + /** Add any deprecated keys. */ @SuppressWarnings("deprecation") private static void addDeprecatedKeys() { @@ -585,9 +608,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, // the encryption algorithms) bindAWSClient(name, delegationTokensEnabled); - initTransferManager(); - - // This initiates a probe against S3 for the bucket existing. doBucketProbing(); @@ -655,7 +675,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, AWS_S3_VECTOR_ACTIVE_RANGE_READS, DEFAULT_AWS_S3_VECTOR_ACTIVE_RANGE_READS, 1); vectoredIOContext = populateVectoredIOContext(conf); scheme = (this.uri != null && this.uri.getScheme() != null) ? this.uri.getScheme() : FS_S3A; - } catch (AmazonClientException e) { + } catch (SdkException e) { // amazon client exception: stop all services then throw the translation cleanupWithLogger(LOG, span); stopAllServices(); @@ -707,7 +727,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * bucket existence check is not done to improve performance of * S3AFileSystem initialization. When set to 1 or 2, bucket existence check * will be performed which is potentially slow. - * If 3 or higher: warn and use the v2 check. + * If 3 or higher: warn and skip check. * Also logging DNS address of the s3 endpoint if the bucket probe value is * greater than 0 else skipping it for increased performance. * @throws UnknownStoreException the bucket is absent @@ -724,18 +744,14 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, LOG.debug("skipping check for bucket existence"); break; case 1: + case 2: logDnsLookup(getConf()); verifyBucketExists(); break; - case 2: - logDnsLookup(getConf()); - verifyBucketExistsV2(); - break; default: // we have no idea what this is, assume it is from a later release. - LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2", + LOG.warn("Unknown bucket probe option {}: {}; skipping check for bucket existence", S3A_BUCKET_PROBE, bucketProbe); - verifyBucketExistsV2(); break; } } @@ -828,54 +844,37 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, } /** - * Verify that the bucket exists. This does not check permissions, - * not even read access. + * Verify that the bucket exists. * Retry policy: retrying, translated. * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ @Retries.RetryTranslated - protected void verifyBucketExists() - throws UnknownStoreException, IOException { - if (!invoker.retry("doesBucketExist", bucket, true, - trackDurationOfOperation(getDurationTrackerFactory(), - STORE_EXISTS_PROBE.getSymbol(), - () -> s3.doesBucketExist(bucket)))) { - throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " - + "not exist"); - } - } + protected void verifyBucketExists() throws UnknownStoreException, IOException { - /** - * Verify that the bucket exists. This will correctly throw an exception - * when credentials are invalid. - * Retry policy: retrying, translated. - * @throws UnknownStoreException the bucket is absent - * @throws IOException any other problem talking to S3 - */ - @Retries.RetryTranslated - protected void verifyBucketExistsV2() - throws UnknownStoreException, IOException { - if (!invoker.retry("doesBucketExistV2", bucket, true, - trackDurationOfOperation(getDurationTrackerFactory(), - STORE_EXISTS_PROBE.getSymbol(), - () -> { - // Bug in SDK always returns `true` for AccessPoint ARNs with `doesBucketExistV2()` - // expanding implementation to use ARNs and buckets correctly + if(!trackDurationAndSpan( + STORE_EXISTS_PROBE, bucket, null, () -> + invoker.retry("doestBucketExist", bucket, true, () -> { try { - s3.getBucketAcl(bucket); - } catch (AmazonServiceException ex) { - int statusCode = ex.getStatusCode(); - if (statusCode == SC_404 || - (statusCode == SC_403 && ex.getMessage().contains(AP_INACCESSIBLE))) { + if (BUCKET_REGIONS.containsKey(bucket)) { + return true; + } + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + return true; + } catch (AwsServiceException ex) { + int statusCode = ex.statusCode(); + if (statusCode == SC_404_NOT_FOUND || + (statusCode == SC_403_FORBIDDEN && accessPoint != null)) { return false; } } return true; }))) { - throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " - + "not exist"); + + throw new UnknownStoreException("s3a://" + bucket + "/", + " Bucket does " + "not exist. " + "Accessing with " + ENDPOINT + " set to " + + getConf().getTrimmed(ENDPOINT, null)); } } @@ -916,7 +915,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @param dtEnabled are delegation tokens enabled? * @throws IOException failure. */ - @SuppressWarnings("deprecation") private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { Configuration conf = getConf(); credentials = null; @@ -967,8 +965,14 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, ? conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT) : accessPoint.getEndpoint(); - S3ClientFactory.S3ClientCreationParameters parameters = null; - parameters = new S3ClientFactory.S3ClientCreationParameters() + String configuredRegion = accessPoint == null + ? conf.getTrimmed(AWS_REGION) + : accessPoint.getRegion(); + + Region region = getS3Region(configuredRegion); + + S3ClientFactory.S3ClientCreationParameters parameters = + new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(credentials) .withPathUri(name) .withEndpoint(endpoint) @@ -976,11 +980,96 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, .withPathStyleAccess(conf.getBoolean(PATH_STYLE_ACCESS, false)) .withUserAgentSuffix(uaSuffix) .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, DEFAULT_ALLOW_REQUESTER_PAYS)) - .withRequestHandlers(auditManager.createRequestHandlers()); + .withExecutionInterceptors(auditManager.createExecutionInterceptors()) + .withMinimumPartSize(partSize) + .withTransferManagerExecutor(unboundedThreadPool) + .withRegion(region); - s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) - .createS3Client(getUri(), - parameters); + S3ClientFactory clientFactory = ReflectionUtils.newInstance(s3ClientFactoryClass, conf); + s3Client = clientFactory.createS3Client(getUri(), parameters); + createS3AsyncClient(clientFactory, parameters); + transferManager = clientFactory.createS3TransferManager(s3AsyncClient); + } + + /** + * Creates and configures the S3AsyncClient. + * Uses synchronized method to suppress spotbugs error. + * + * @param clientFactory factory used to create S3AsyncClient + * @param parameters parameter object + * @throws IOException on any IO problem + */ + private synchronized void createS3AsyncClient(S3ClientFactory clientFactory, + S3ClientFactory.S3ClientCreationParameters parameters) throws IOException { + s3AsyncClient = clientFactory.createS3AsyncClient(getUri(), parameters); + } + + /** + * Get the bucket region. + * + * @param region AWS S3 Region set in the config. This property may not be set, in which case + * ask S3 for the region. + * @return region of the bucket. + */ + private Region getS3Region(String region) throws IOException { + + if (!StringUtils.isBlank(region)) { + return Region.of(region); + } + + Region cachedRegion = BUCKET_REGIONS.get(bucket); + + if (cachedRegion != null) { + LOG.debug("Got region {} for bucket {} from cache", cachedRegion, bucket); + return cachedRegion; + } + + Region s3Region = trackDurationAndSpan(STORE_REGION_PROBE, bucket, null, + () -> invoker.retry("getS3Region", bucket, true, () -> { + try { + + SET_REGION_WARNING.warn( + "Getting region for bucket {} from S3, this will slow down FS initialisation. " + + "To avoid this, set the region using property {}", bucket, + FS_S3A_BUCKET_PREFIX + bucket + ".endpoint.region"); + + // build a s3 client with region eu-west-1 that can be used to get the region of the + // bucket. Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because + // us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com + // to the actual region the bucket is in. As the request is signed with us-east-1 and + // not the bucket's region, it fails. + S3Client getRegionS3Client = + S3Client.builder().region(Region.EU_WEST_1).credentialsProvider(credentials) + .build(); + + HeadBucketResponse headBucketResponse = + getRegionS3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + + Region bucketRegion = Region.of( + headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0)); + BUCKET_REGIONS.put(bucket, bucketRegion); + + return bucketRegion; + } catch (S3Exception exception) { + if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { + Region bucketRegion = Region.of( + exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER) + .get(0)); + BUCKET_REGIONS.put(bucket, bucketRegion); + + return bucketRegion; + } + + if (exception.statusCode() == SC_404_NOT_FOUND) { + throw new UnknownStoreException("s3a://" + bucket + "/", + " Bucket does " + "not exist"); + } + + throw exception; + } + })); + + return s3Region; } /** @@ -1080,12 +1169,14 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, .toUpperCase(Locale.US); StorageClass storageClass = null; if (!storageClassConf.isEmpty()) { - try { - storageClass = StorageClass.fromValue(storageClassConf); - } catch (IllegalArgumentException e) { + storageClass = StorageClass.fromValue(storageClassConf); + + if (storageClass.equals(StorageClass.UNKNOWN_TO_SDK_VERSION)) { LOG.warn("Unknown storage class property {}: {}; falling back to default storage class", STORAGE_CLASS, storageClassConf); + storageClass = null; } + } else { LOG.debug("Unset storage class property {}; falling back to default storage class", STORAGE_CLASS); @@ -1151,22 +1242,10 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, return encryptionSecrets; } - private void initTransferManager() { - TransferManagerConfiguration transferConfiguration = - new TransferManagerConfiguration(); - transferConfiguration.setMinimumUploadPartSize(partSize); - transferConfiguration.setMultipartUploadThreshold(multiPartThreshold); - transferConfiguration.setMultipartCopyPartSize(partSize); - transferConfiguration.setMultipartCopyThreshold(multiPartThreshold); - - transfers = new TransferManager(s3, unboundedThreadPool); - transfers.setConfiguration(transferConfiguration); - } - private void initCannedAcls(Configuration conf) { String cannedACLName = conf.get(CANNED_ACL, DEFAULT_CANNED_ACL); if (!cannedACLName.isEmpty()) { - cannedACL = CannedAccessControlList.valueOf(cannedACLName); + cannedACL = AWSCannedACL.valueOf(cannedACLName).toString(); } else { cannedACL = null; } @@ -1199,12 +1278,22 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, public void abortOutstandingMultipartUploads(long seconds) throws IOException { Preconditions.checkArgument(seconds >= 0); - Date purgeBefore = - new Date(new Date().getTime() - seconds * 1000); + Instant purgeBefore = + Instant.now().minusSeconds(seconds); LOG.debug("Purging outstanding multipart uploads older than {}", purgeBefore); invoker.retry("Purging multipart uploads", bucket, true, - () -> transfers.abortMultipartUploads(bucket, purgeBefore)); + () -> { + MultipartUtils.UploadIterator uploadIterator = + MultipartUtils.listMultipartUploads(createStoreContext(), s3Client, null, maxKeys); + + while (uploadIterator.hasNext()) { + MultipartUpload upload = uploadIterator.next(); + if (upload.initiated().compareTo(purgeBefore) < 0) { + abortMultipartUpload(upload); + } + } + }); } /** @@ -1252,44 +1341,27 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, return 0; } - /** - * Returns the S3 client used by this filesystem. - * This is for internal use within the S3A code itself. - * @return AmazonS3Client - */ - private AmazonS3 getAmazonS3Client() { - return s3; - } - /** * Returns the S3 client used by this filesystem. * Warning: this must only be used for testing, as it bypasses core * S3A operations. * @param reason a justification for requesting access. - * @return AmazonS3Client + * @return S3Client */ @VisibleForTesting - public AmazonS3 getAmazonS3ClientForTesting(String reason) { - LOG.warn("Access to S3A client requested, reason {}", reason); - V2Migration.v1S3ClientRequested(); - return s3; + public S3Client getAmazonS3ClientForTesting(String reason) { + LOG.warn("Access to S3 client requested, reason {}", reason); + return s3Client; } /** * Set the client -used in mocking tests to force in a different client. * @param client client. */ - protected void setAmazonS3Client(AmazonS3 client) { - Preconditions.checkNotNull(client, "client"); - LOG.debug("Setting S3 client to {}", client); - s3 = client; - - // Need to use a new TransferManager that uses the new client. - // Also, using a new TransferManager requires a new threadpool as the old - // TransferManager will shut the thread pool down when it is garbage - // collected. - initThreadPools(getConf()); - initTransferManager(); + protected void setAmazonS3Client(S3Client client) { + Preconditions.checkNotNull(client, "clientV2"); + LOG.debug("Setting S3V2 client to {}", client); + s3Client = client; } /** @@ -1308,6 +1380,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, /** * Get the region of a bucket; fixing up the region so it can be used * in the builders of other AWS clients. + * TODO: Review. Used only for S3Guard? * Requires the caller to have the AWS role permission * {@code s3:GetBucketLocation}. * Retry policy: retrying, translated. @@ -1326,7 +1399,10 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, // If accessPoint then region is known from Arn accessPoint != null ? accessPoint.getRegion() - : s3.getBucketLocation(bucketName))); + : s3Client.getBucketLocation(GetBucketLocationRequest.builder() + .bucket(bucketName) + .build()) + .locationConstraintAsString())); return fixBucketRegion(region); } @@ -1414,7 +1490,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * Get the canned ACL of this FS. * @return an ACL, if any */ - CannedAccessControlList getCannedACL() { + String getCannedACL() { return cannedACL; } @@ -1644,18 +1720,18 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, } @Override - public GetObjectRequest newGetRequest(final String key) { + public GetObjectRequest.Builder newGetRequestBuilder(final String key) { // active the audit span used for the operation try (AuditSpan span = auditSpan.activate()) { - return getRequestFactory().newGetObjectRequest(key); + return getRequestFactory().newGetObjectRequestBuilder(key); } } @Override - public S3Object getObject(GetObjectRequest request) { + public ResponseInputStream getObject(GetObjectRequest request) { // active the audit span used for the operation try (AuditSpan span = auditSpan.activate()) { - return s3.getObject(request); + return s3Client.getObject(request); } } @@ -1682,18 +1758,19 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, implements WriteOperationHelper.WriteOperationHelperCallbacks { @Override - public SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request) { - return s3.selectObjectContent(request); + public CompletableFuture selectObjectContent( + SelectObjectContentRequest request, + SelectObjectContentResponseHandler responseHandler) { + return s3AsyncClient.selectObjectContent(request, responseHandler); } @Override - public CompleteMultipartUploadResult completeMultipartUpload( + public CompleteMultipartUploadResponse completeMultipartUpload( CompleteMultipartUploadRequest request) { - return s3.completeMultipartUpload(request); + return s3Client.completeMultipartUpload(request); } } - /** * Create the read context for reading from the referenced file, * using FS state as well as the status. @@ -2055,7 +2132,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, innerRename(src, dst)); LOG.debug("Copied {} bytes", bytesCopied); return true; - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("rename(" + src +", " + dst + ")", src, e); } catch (RenameFailedException e) { LOG.info("{}", e.getMessage()); @@ -2166,7 +2243,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * This operation throws an exception on any failure which needs to be * reported and downgraded to a failure. * Retries: retry translated, assuming all operations it is called do - * so. For safely, consider catch and handle AmazonClientException + * so. For safely, consider catch and handle SdkException * because this is such a complex method there's a risk it could surface. * @param source path to be renamed * @param dest new path after rename @@ -2177,12 +2254,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @return the number of bytes copied. * @throws FileNotFoundException there's no source file. * @throws IOException on IO failure. - * @throws AmazonClientException on failures inside the AWS SDK + * @throws SdkException on failures inside the AWS SDK */ @Retries.RetryMixed private long innerRename(Path source, Path dest) throws RenameFailedException, FileNotFoundException, IOException, - AmazonClientException { + SdkException { Path src = qualify(source); Path dst = qualify(dest); @@ -2277,7 +2354,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, } @Override - public CopyResult copyFile(final String srcKey, + public CopyObjectResponse copyFile(final String srcKey, final String destKey, final S3ObjectAttributes srcAttributes, final S3AReadOpContext readContext) throws IOException { @@ -2288,9 +2365,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, @Override public void removeKeys( - final List keysToDelete, + final List keysToDelete, final boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, IOException { + throws MultiObjectDeleteException, SdkException, IOException { auditSpan.activate(); S3AFileSystem.this.removeKeys(keysToDelete, deleteFakeDir); } @@ -2399,7 +2476,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, @InterfaceAudience.LimitedPrivate("utilities") @Retries.RetryTranslated @InterfaceStability.Evolving - public ObjectMetadata getObjectMetadata(Path path) throws IOException { + public HeadObjectResponse getObjectMetadata(Path path) throws IOException { V2Migration.v1GetObjectMetadataCalled(); return trackDurationAndSpan(INVOCATION_GET_FILE_STATUS, path, () -> getObjectMetadata(makeQualified(path), null, invoker, @@ -2416,7 +2493,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @throws IOException IO and object access problems. */ @Retries.RetryTranslated - private ObjectMetadata getObjectMetadata(Path path, + private HeadObjectResponse getObjectMetadata(Path path, ChangeTracker changeTracker, Invoker changeInvoker, String operation) throws IOException { String key = pathToKey(path); @@ -2629,7 +2706,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, @Retries.RetryRaw @VisibleForTesting @InterfaceAudience.LimitedPrivate("external utilities") - ObjectMetadata getObjectMetadata(String key) throws IOException { + HeadObjectResponse getObjectMetadata(String key) throws IOException { return getObjectMetadata(key, null, invoker, "getObjectMetadata"); } @@ -2646,28 +2723,28 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @throws RemoteFileChangedException if an unexpected version is detected */ @Retries.RetryRaw - protected ObjectMetadata getObjectMetadata(String key, + protected HeadObjectResponse getObjectMetadata(String key, ChangeTracker changeTracker, Invoker changeInvoker, String operation) throws IOException { - ObjectMetadata meta = changeInvoker.retryUntranslated("GET " + key, true, + HeadObjectResponse response = changeInvoker.retryUntranslated("GET " + key, true, () -> { - GetObjectMetadataRequest request - = getRequestFactory().newGetObjectMetadataRequest(key); + HeadObjectRequest.Builder requestBuilder = + getRequestFactory().newHeadObjectRequestBuilder(key); incrementStatistic(OBJECT_METADATA_REQUESTS); DurationTracker duration = getDurationTrackerFactory() .trackDuration(ACTION_HTTP_HEAD_REQUEST.getSymbol()); try { LOG.debug("HEAD {} with change tracker {}", key, changeTracker); if (changeTracker != null) { - changeTracker.maybeApplyConstraint(request); + changeTracker.maybeApplyConstraint(requestBuilder); } - ObjectMetadata objectMetadata = s3.getObjectMetadata(request); + HeadObjectResponse headObjectResponse = s3Client.headObject(requestBuilder.build()); if (changeTracker != null) { - changeTracker.processMetadata(objectMetadata, operation); + changeTracker.processMetadata(headObjectResponse, operation); } - return objectMetadata; - } catch(AmazonServiceException ase) { + return headObjectResponse; + } catch (AwsServiceException ase) { if (!isObjectNotFound(ase)) { // file not found is not considered a failure of the call, // so only switch the duration tracker to update failure @@ -2681,7 +2758,27 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, } }); incrementReadOperations(); - return meta; + return response; + } + + /** + * Request bucket metadata. + * @return the metadata + * @throws UnknownStoreException the bucket is absent + * @throws IOException any other problem talking to S3 + */ + @Retries.RetryRaw + protected HeadBucketResponse getBucketMetadata() throws IOException { + final HeadBucketResponse response = trackDurationAndSpan(STORE_EXISTS_PROBE, bucket, null, + () -> invoker.retry("getBucketMetadata()", bucket, true, () -> { + try { + return s3Client.headBucket( + getRequestFactory().newHeadBucketRequestBuilder(bucket).build()); + } catch (NoSuchBucketException e) { + throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " + "not exist"); + } + })); + return response; } /** @@ -2710,9 +2807,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, OBJECT_LIST_REQUEST, () -> { if (useListV1) { - return S3ListResult.v1(s3.listObjects(request.getV1())); + return S3ListResult.v1(s3Client.listObjects(request.getV1())); } else { - return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + return S3ListResult.v2(s3Client.listObjectsV2(request.getV2())); } })); } @@ -2755,15 +2852,21 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, OBJECT_CONTINUE_LIST_REQUEST, () -> { if (useListV1) { - return S3ListResult.v1( - s3.listNextBatchOfObjects( - getRequestFactory() - .newListNextBatchOfObjectsRequest( - prevResult.getV1()))); + List prevListResult = prevResult.getV1().contents(); + + // Next markers are only present when a delimiter is specified. + String nextMarker; + if (prevResult.getV1().nextMarker() != null) { + nextMarker = prevResult.getV1().nextMarker(); + } else { + nextMarker = prevListResult.get(prevListResult.size() - 1).key(); + } + + return S3ListResult.v1(s3Client.listObjects( + request.getV1().toBuilder().marker(nextMarker).build())); } else { - request.getV2().setContinuationToken(prevResult.getV2() - .getNextContinuationToken()); - return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + return S3ListResult.v2(s3Client.listObjectsV2(request.getV2().toBuilder() + .continuationToken(prevResult.getV2().nextContinuationToken()).build())); } })); } @@ -2793,14 +2896,14 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * * Retry policy: retry untranslated; delete considered idempotent. * @param key key to blob to delete. - * @throws AmazonClientException problems working with S3 + * @throws SdkException problems working with S3 * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory. */ @VisibleForTesting @Retries.RetryRaw protected void deleteObject(String key) - throws AmazonClientException, IOException { + throws SdkException, IOException { blockRootDelete(key); incrementWriteOperations(); try (DurationInfo ignored = @@ -2812,8 +2915,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, incrementStatistic(OBJECT_DELETE_OBJECTS); trackDurationOfInvocation(getDurationTrackerFactory(), OBJECT_DELETE_REQUEST.getSymbol(), - () -> s3.deleteObject(getRequestFactory() - .newDeleteObjectRequest(key))); + () -> s3Client.deleteObject(getRequestFactory() + .newDeleteObjectRequestBuilder(key) + .build())); return null; }); } @@ -2826,14 +2930,14 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @param f path path to delete * @param key key of entry * @param isFile is the path a file (used for instrumentation only) - * @throws AmazonClientException problems working with S3 + * @throws SdkException problems working with S3 * @throws IOException from invoker signature only -should not be raised. */ @Retries.RetryRaw void deleteObjectAtPath(Path f, String key, boolean isFile) - throws AmazonClientException, IOException { + throws SdkException, IOException { if (isFile) { instrumentation.fileDeleted(1); } else { @@ -2875,66 +2979,58 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @return the AWS response * @throws MultiObjectDeleteException one or more of the keys could not * be deleted. - * @throws AmazonClientException amazon-layer failure. + * @throws SdkException amazon-layer failure. */ @Retries.RetryRaw - private DeleteObjectsResult deleteObjects(DeleteObjectsRequest deleteRequest) - throws MultiObjectDeleteException, AmazonClientException, IOException { + private DeleteObjectsResponse deleteObjects(DeleteObjectsRequest deleteRequest) + throws MultiObjectDeleteException, SdkException, IOException { incrementWriteOperations(); BulkDeleteRetryHandler retryHandler = new BulkDeleteRetryHandler(createStoreContext()); - int keyCount = deleteRequest.getKeys().size(); - try(DurationInfo ignored = + int keyCount = deleteRequest.delete().objects().size(); + try (DurationInfo ignored = new DurationInfo(LOG, false, "DELETE %d keys", keyCount)) { - return invoker.retryUntranslated("delete", - DELETE_CONSIDERED_IDEMPOTENT, - (text, e, r, i) -> { - // handle the failure - retryHandler.bulkDeleteRetried(deleteRequest, e); - }, - // duration is tracked in the bulk delete counters - trackDurationOfOperation(getDurationTrackerFactory(), - OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { - incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); - return s3.deleteObjects(deleteRequest); - })); - } catch (MultiObjectDeleteException e) { - // one or more of the keys could not be deleted. - // log and rethrow - List errors = e.getErrors(); - LOG.debug("Partial failure of delete, {} errors", errors.size(), e); - for (MultiObjectDeleteException.DeleteError error : errors) { - LOG.debug("{}: \"{}\" - {}", - error.getKey(), error.getCode(), error.getMessage()); + DeleteObjectsResponse response = + invoker.retryUntranslated("delete", DELETE_CONSIDERED_IDEMPOTENT, + (text, e, r, i) -> { + // handle the failure + retryHandler.bulkDeleteRetried(deleteRequest, e); + }, + // duration is tracked in the bulk delete counters + trackDurationOfOperation(getDurationTrackerFactory(), + OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { + incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); + return s3Client.deleteObjects(deleteRequest); + })); + + if (!response.errors().isEmpty()) { + // one or more of the keys could not be deleted. + // log and then throw + List errors = response.errors(); + LOG.debug("Partial failure of delete, {} errors", errors.size()); + for (S3Error error : errors) { + LOG.debug("{}: \"{}\" - {}", error.key(), error.code(), error.message()); + } + throw new MultiObjectDeleteException(errors); } - throw e; + + return response; } } /** - * Create a putObject request. + * Create a putObject request builder. * Adds the ACL and metadata * @param key key of object - * @param metadata metadata header - * @param srcfile source file + * @param length length of object to be uploaded + * @param isDirectoryMarker true if object to be uploaded is a directory marker * @return the request */ - public PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, File srcfile) { - return requestFactory.newPutObjectRequest(key, metadata, null, srcfile); - } - - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header. - * @return a new metadata instance - */ - public ObjectMetadata newObjectMetadata(long length) { - return requestFactory.newObjectMetadata(length); + public PutObjectRequest.Builder newPutObjectRequestBuilder(String key, + long length, + boolean isDirectoryMarker) { + return requestFactory.newPutObjectRequestBuilder(key, null, length, isDirectoryMarker); } /** @@ -2951,15 +3047,24 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * Retry policy: N/A: the transfer manager is performing the upload. * Auditing: must be inside an audit span. * @param putObjectRequest the request + * @param file the file to be uploaded + * @param listener the progress listener for the request * @return the upload initiated */ @Retries.OnceRaw - public UploadInfo putObject(PutObjectRequest putObjectRequest) { + public UploadInfo putObject(PutObjectRequest putObjectRequest, File file, + ProgressableProgressListener listener) { long len = getPutRequestLength(putObjectRequest); - LOG.debug("PUT {} bytes to {} via transfer manager ", - len, putObjectRequest.getKey()); + LOG.debug("PUT {} bytes to {} via transfer manager ", len, putObjectRequest.key()); incrementPutStartStatistics(len); - Upload upload = transfers.upload(putObjectRequest); + + FileUpload upload = transferManager.uploadFile( + UploadFileRequest.builder() + .putObjectRequest(putObjectRequest) + .source(file) + .addTransferListener(listener) + .build()); + return new UploadInfo(upload, len); } @@ -2974,30 +3079,37 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @param putObjectRequest the request * @param putOptions put object options * @param durationTrackerFactory factory for duration tracking + * @param uploadData data to be uploaded + * @param isFile represents if data to be uploaded is a file * @return the upload initiated - * @throws AmazonClientException on problems + * @throws SdkException on problems */ @VisibleForTesting @Retries.OnceRaw("For PUT; post-PUT actions are RetryExceptionsSwallowed") - PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest, + PutObjectResponse putObjectDirect(PutObjectRequest putObjectRequest, PutObjectOptions putOptions, + S3ADataBlocks.BlockUploadData uploadData, boolean isFile, DurationTrackerFactory durationTrackerFactory) - throws AmazonClientException { + throws SdkException { long len = getPutRequestLength(putObjectRequest); - LOG.debug("PUT {} bytes to {}", len, putObjectRequest.getKey()); + LOG.debug("PUT {} bytes to {}", len, putObjectRequest.key()); incrementPutStartStatistics(len); try { - PutObjectResult result = trackDurationOfSupplier( - nonNullDurationTrackerFactory(durationTrackerFactory), - OBJECT_PUT_REQUESTS.getSymbol(), () -> - s3.putObject(putObjectRequest)); + PutObjectResponse response = + trackDurationOfSupplier(nonNullDurationTrackerFactory(durationTrackerFactory), + OBJECT_PUT_REQUESTS.getSymbol(), + () -> isFile ? + s3Client.putObject(putObjectRequest, RequestBody.fromFile(uploadData.getFile())) : + s3Client.putObject(putObjectRequest, + RequestBody.fromInputStream(uploadData.getUploadStream(), + putObjectRequest.contentLength()))); incrementPutCompletedStatistics(true, len); // apply any post-write actions. - finishedWrite(putObjectRequest.getKey(), len, - result.getETag(), result.getVersionId(), + finishedWrite(putObjectRequest.key(), len, + response.eTag(), response.versionId(), putOptions); - return result; - } catch (SdkBaseException e) { + return response; + } catch (SdkException e) { incrementPutCompletedStatistics(false, len); throw e; } @@ -3010,12 +3122,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @throws IllegalArgumentException if the length is negative */ private long getPutRequestLength(PutObjectRequest putObjectRequest) { - long len; - if (putObjectRequest.getFile() != null) { - len = putObjectRequest.getFile().length(); - } else { - len = putObjectRequest.getMetadata().getContentLength(); - } + long len = putObjectRequest.contentLength(); + Preconditions.checkState(len >= 0, "Cannot PUT object of unknown length"); return len; } @@ -3023,28 +3131,29 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, /** * Upload part of a multi-partition file. * Increments the write and put counters. - * Important: this call does not close any input stream in the request. + * Important: this call does not close any input stream in the body. * * Retry Policy: none. - * @param request request * @param durationTrackerFactory duration tracker factory for operation + * @param request the upload part request. + * @param body the request body. * @return the result of the operation. - * @throws AmazonClientException on problems + * @throws AwsServiceException on problems */ @Retries.OnceRaw - UploadPartResult uploadPart(UploadPartRequest request, + UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, final DurationTrackerFactory durationTrackerFactory) - throws AmazonClientException { - long len = request.getPartSize(); + throws AwsServiceException { + long len = request.contentLength(); incrementPutStartStatistics(len); try { - UploadPartResult uploadPartResult = trackDurationOfSupplier( + UploadPartResponse uploadPartResponse = trackDurationOfSupplier( nonNullDurationTrackerFactory(durationTrackerFactory), MULTIPART_UPLOAD_PART_PUT.getSymbol(), () -> - s3.uploadPart(request)); + s3Client.uploadPart(request, body)); incrementPutCompletedStatistics(true, len); - return uploadPartResult; - } catch (AmazonClientException e) { + return uploadPartResponse; + } catch (AwsServiceException e) { incrementPutCompletedStatistics(false, len); throw e; } @@ -3109,56 +3218,57 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * be deleted in a multiple object delete operation. * The number of rejected objects will be added to the metric * {@link Statistic#FILES_DELETE_REJECTED}. - * @throws AmazonClientException other amazon-layer failure. + * @throws AwsServiceException other amazon-layer failure. */ @Retries.RetryRaw private void removeKeysS3( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, - IOException { + throws MultiObjectDeleteException, AwsServiceException, IOException { if (LOG.isDebugEnabled()) { LOG.debug("Initiating delete operation for {} objects", keysToDelete.size()); - for (DeleteObjectsRequest.KeyVersion key : keysToDelete) { - LOG.debug(" {} {}", key.getKey(), - key.getVersion() != null ? key.getVersion() : ""); + for (ObjectIdentifier objectIdentifier : keysToDelete) { + LOG.debug(" {} {}", objectIdentifier.key(), + objectIdentifier.versionId() != null ? objectIdentifier.versionId() : ""); } } if (keysToDelete.isEmpty()) { // exit fast if there are no keys to delete return; } - for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { - blockRootDelete(keyVersion.getKey()); + for (ObjectIdentifier objectIdentifier : keysToDelete) { + blockRootDelete(objectIdentifier.key()); } try { if (enableMultiObjectsDelete) { if (keysToDelete.size() <= pageSize) { deleteObjects(getRequestFactory() - .newBulkDeleteRequest(keysToDelete)); + .newBulkDeleteRequestBuilder(keysToDelete) + .build()); } else { // Multi object deletion of more than 1000 keys is not supported // by s3. So we are paging the keys by page size. LOG.debug("Partitioning the keys to delete as it is more than " + "page size. Number of keys: {}, Page size: {}", keysToDelete.size(), pageSize); - for (List batchOfKeysToDelete : + for (List batchOfKeysToDelete : Lists.partition(keysToDelete, pageSize)) { deleteObjects(getRequestFactory() - .newBulkDeleteRequest(batchOfKeysToDelete)); + .newBulkDeleteRequestBuilder(batchOfKeysToDelete) + .build()); } } } else { - for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { - deleteObject(keyVersion.getKey()); + for (ObjectIdentifier objectIdentifier : keysToDelete) { + deleteObject(objectIdentifier.key()); } } } catch (MultiObjectDeleteException ex) { // partial delete. // Update the stats with the count of the actual number of successful // deletions. - int rejected = ex.getErrors().size(); + int rejected = ex.errors().size(); noteDeleted(keysToDelete.size() - rejected, deleteFakeDir); incrementStatistic(FILES_DELETE_REJECTED, rejected); throw ex; @@ -3191,15 +3301,15 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. - * @throws AmazonClientException amazon-layer failure. + * @throws AwsServiceException amazon-layer failure. * @throws IOException other IO Exception. */ @VisibleForTesting @Retries.RetryRaw public void removeKeys( - final List keysToDelete, + final List keysToDelete, final boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException { try (DurationInfo ignored = new DurationInfo(LOG, false, "Deleting %d keys", keysToDelete.size())) { @@ -3269,7 +3379,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, LOG.debug("Couldn't delete {} - does not exist: {}", path, e.toString()); instrumentation.errorIgnored(); return false; - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("delete", path, e); } } @@ -3283,7 +3393,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ @Retries.RetryTranslated private void createFakeDirectoryIfNecessary(Path f) - throws IOException, AmazonClientException { + throws IOException, SdkException { String key = pathToKey(f); // we only make the LIST call; the codepaths to get here should not // be reached if there is an empty dir marker -and if they do, it @@ -3303,7 +3413,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, @Retries.RetryTranslated @VisibleForTesting protected void maybeCreateFakeParentDirectory(Path path) - throws IOException, AmazonClientException { + throws IOException, SdkException { Path parent = path.getParent(); if (parent != null && !parent.isRoot() && !isUnderMagicCommitPath(parent)) { createFakeDirectoryIfNecessary(parent); @@ -3357,11 +3467,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @return the statuses of the files/directories in the given patch * @throws FileNotFoundException when the path does not exist; * @throws IOException due to an IO problem. - * @throws AmazonClientException on failures inside the AWS SDK + * @throws SdkException on failures inside the AWS SDK */ private RemoteIterator innerListStatus(Path f) throws FileNotFoundException, - IOException, AmazonClientException { + IOException, SdkException { Path path = qualify(f); LOG.debug("List status for path: {}", path); @@ -3425,15 +3535,15 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private S3ListRequest createListObjectsRequest(String key, String delimiter, int limit) { if (!useListV1) { - ListObjectsV2Request request = - getRequestFactory().newListObjectsV2Request( + ListObjectsV2Request.Builder requestBuilder = + getRequestFactory().newListObjectsV2RequestBuilder( key, delimiter, limit); - return S3ListRequest.v2(request); + return S3ListRequest.v2(requestBuilder.build()); } else { - ListObjectsRequest request = - getRequestFactory().newListObjectsV1Request( + ListObjectsRequest.Builder requestBuilder = + getRequestFactory().newListObjectsV1RequestBuilder( key, delimiter, limit); - return S3ListRequest.v1(request); + return S3ListRequest.v1(requestBuilder.build()); } } @@ -3721,31 +3831,31 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, && probes.contains(StatusProbeEnum.Head)) { try { // look for the simple file - ObjectMetadata meta = getObjectMetadata(key); + HeadObjectResponse meta = getObjectMetadata(key); LOG.debug("Found exact file: normal file {}", key); - long contentLength = meta.getContentLength(); + long contentLength = meta.contentLength(); // check if CSE is enabled, then strip padded length. - if (isCSEEnabled - && meta.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null + if (isCSEEnabled && + meta.metadata().get(AWSHeaders.CRYPTO_CEK_ALGORITHM) != null && contentLength >= CSE_PADDING_LENGTH) { contentLength -= CSE_PADDING_LENGTH; } return new S3AFileStatus(contentLength, - dateToLong(meta.getLastModified()), + meta.lastModified().toEpochMilli(), path, getDefaultBlockSize(path), username, - meta.getETag(), - meta.getVersionId()); - } catch (AmazonServiceException e) { + meta.eTag(), + meta.versionId()); + } catch (AwsServiceException e) { // if the response is a 404 error, it just means that there is // no file at that path...the remaining checks will be needed. // But: an empty bucket is also a 404, so check for that // and fail. - if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { + if (e.statusCode() != SC_404_NOT_FOUND || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("getFileStatus", path, e); } } @@ -3788,11 +3898,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, LOG.debug("Found root directory"); return new S3AFileStatus(Tristate.TRUE, path, username); } - } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { + } catch (AwsServiceException e) { + if (e.statusCode() != SC_404_NOT_FOUND || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("getFileStatus", path, e); } } @@ -3836,7 +3946,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @throws IOException IO problem * @throws FileAlreadyExistsException the destination file exists and * overwrite==false - * @throws AmazonClientException failure in the AWS SDK + * @throws SdkException failure in the AWS SDK */ @Override @AuditEntryPoint @@ -3891,13 +4001,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, to, () -> { final String key = pathToKey(to); - final ObjectMetadata om = newObjectMetadata(file.length()); Progressable progress = null; - PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); - S3AFileSystem.this.invoker.retry( - "putObject(" + "" + ")", to.toString(), - true, - () -> executePut(putObjectRequest, progress, putOptionsForPath(to))); + PutObjectRequest.Builder putObjectRequestBuilder = + newPutObjectRequestBuilder(key, file.length(), false); + S3AFileSystem.this.invoker.retry("putObject(" + "" + ")", to.toString(), true, + () -> executePut(putObjectRequestBuilder.build(), progress, putOptionsForPath(to), + file)); return null; }); @@ -3922,40 +4031,35 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, /** * Execute a PUT via the transfer manager, blocking for completion. - * If the waiting for completion is interrupted, the upload will be - * aborted before an {@code InterruptedIOException} is thrown. * @param putObjectRequest request * @param progress optional progress callback * @param putOptions put object options * @return the upload result - * @throws InterruptedIOException if the blocking was interrupted. + * @throws IOException IO failure */ @Retries.OnceRaw("For PUT; post-PUT actions are RetrySwallowed") - UploadResult executePut( + PutObjectResponse executePut( final PutObjectRequest putObjectRequest, final Progressable progress, - final PutObjectOptions putOptions) - throws InterruptedIOException { - String key = putObjectRequest.getKey(); + final PutObjectOptions putOptions, + final File file) + throws IOException { + String key = putObjectRequest.key(); long len = getPutRequestLength(putObjectRequest); - UploadInfo info = putObject(putObjectRequest); - Upload upload = info.getUpload(); - ProgressableProgressListener listener = new ProgressableProgressListener( - this, key, upload, progress); - upload.addProgressListener(listener); - UploadResult result = waitForUploadCompletion(key, info); - listener.uploadCompleted(); + ProgressableProgressListener listener = + new ProgressableProgressListener(this, putObjectRequest.key(), progress); + UploadInfo info = putObject(putObjectRequest, file, listener); + PutObjectResponse result = waitForUploadCompletion(key, info).response(); + listener.uploadCompleted(info.getFileUpload()); // post-write actions finishedWrite(key, len, - result.getETag(), result.getVersionId(), putOptions); + result.eTag(), result.versionId(), putOptions); return result; } /** * Wait for an upload to complete. - * If the waiting for completion is interrupted, the upload will be - * aborted before an {@code InterruptedIOException} is thrown. * If the upload (or its result collection) failed, this is where * the failure is raised as an AWS exception. * Calls {@link #incrementPutCompletedStatistics(boolean, long)} @@ -3963,24 +4067,20 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @param key destination key * @param uploadInfo upload to wait for * @return the upload result - * @throws InterruptedIOException if the blocking was interrupted. + * @throws IOException IO failure */ @Retries.OnceRaw - UploadResult waitForUploadCompletion(String key, UploadInfo uploadInfo) - throws InterruptedIOException { - Upload upload = uploadInfo.getUpload(); + CompletedFileUpload waitForUploadCompletion(String key, UploadInfo uploadInfo) + throws IOException { + FileUpload upload = uploadInfo.getFileUpload(); try { - UploadResult result = upload.waitForUploadResult(); + CompletedFileUpload result = upload.completionFuture().join(); incrementPutCompletedStatistics(true, uploadInfo.getLength()); return result; - } catch (InterruptedException e) { + } catch (CompletionException e) { LOG.info("Interrupted: aborting upload"); incrementPutCompletedStatistics(false, uploadInfo.getLength()); - upload.abort(); - throw (InterruptedIOException) - new InterruptedIOException("Interrupted in PUT to " - + keyToQualifiedPath(key)) - .initCause(e); + throw extractException("upload", key, e); } } @@ -4073,17 +4173,13 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * both the expected state of this FS and of failures while being stopped. */ protected synchronized void stopAllServices() { - // shutting down the transfer manager also shuts - // down the S3 client it is bonded to. - if (transfers != null) { - try { - transfers.shutdownNow(true); - } catch (RuntimeException e) { - // catch and swallow for resilience. - LOG.debug("When shutting down", e); - } - transfers = null; - } + closeAutocloseables(LOG, transferManager, + s3Client, + s3AsyncClient); + transferManager = null; + s3Client = null; + s3AsyncClient = null; + // At this point the S3A client is shut down, // now the executor pools are closed HadoopExecutors.shutdown(boundedThreadPool, LOG, @@ -4238,21 +4334,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * @throws IOException Other IO problems */ @Retries.RetryTranslated - private CopyResult copyFile(String srcKey, String dstKey, long size, + private CopyObjectResponse copyFile(String srcKey, String dstKey, long size, S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) - throws IOException, InterruptedIOException { + throws IOException { LOG.debug("copyFile {} -> {} ", srcKey, dstKey); - ProgressListener progressListener = progressEvent -> { - switch (progressEvent.getEventType()) { - case TRANSFER_PART_COMPLETED_EVENT: - incrementWriteOperations(); - break; - default: - break; - } - }; - ChangeTracker changeTracker = new ChangeTracker( keyToQualifiedPath(srcKey).toString(), changeDetectionPolicy, @@ -4264,7 +4350,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, String action = "copyFile(" + srcKey + ", " + dstKey + ")"; Invoker readInvoker = readContext.getReadInvoker(); - ObjectMetadata srcom; + HeadObjectResponse srcom; try { srcom = once(action, srcKey, () -> @@ -4287,33 +4373,32 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, action, srcKey, true, () -> { - CopyObjectRequest copyObjectRequest = - getRequestFactory().newCopyObjectRequest(srcKey, dstKey, srcom); - changeTracker.maybeApplyConstraint(copyObjectRequest); + CopyObjectRequest.Builder copyObjectRequestBuilder = + getRequestFactory().newCopyObjectRequestBuilder(srcKey, dstKey, srcom); + changeTracker.maybeApplyConstraint(copyObjectRequestBuilder); incrementStatistic(OBJECT_COPY_REQUESTS); - Copy copy = transfers.copy(copyObjectRequest, - getAuditManager().createStateChangeListener()); - copy.addProgressListener(progressListener); - CopyOutcome copyOutcome = CopyOutcome.waitForCopy(copy); - InterruptedException interruptedException = - copyOutcome.getInterruptedException(); - if (interruptedException != null) { - // copy interrupted: convert to an IOException. - throw (IOException)new InterruptedIOException( - "Interrupted copying " + srcKey - + " to " + dstKey + ", cancelling") - .initCause(interruptedException); + + Copy copy = transferManager.copy( + CopyRequest.builder() + .copyObjectRequest(copyObjectRequestBuilder.build()) + .build()); + + try { + CompletedCopy completedCopy = copy.completionFuture().join(); + CopyObjectResponse result = completedCopy.response(); + changeTracker.processResponse(result); + incrementWriteOperations(); + instrumentation.filesCopied(1, size); + return result; + } catch (CompletionException e) { + Throwable cause = e.getCause(); + if (cause instanceof SdkException) { + SdkException awsException = (SdkException)cause; + changeTracker.processException(awsException, "copy"); + throw awsException; + } + throw extractException(action, srcKey, e); } - SdkBaseException awsException = copyOutcome.getAwsException(); - if (awsException != null) { - changeTracker.processException(awsException, "copy"); - throw awsException; - } - CopyResult result = copyOutcome.getCopyResult(); - changeTracker.processResponse(result); - incrementWriteOperations(); - instrumentation.filesCopied(1, size); - return result; }); } @@ -4322,16 +4407,16 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * Retry policy: none + untranslated. * @param request request to initiate * @return the result of the call - * @throws AmazonClientException on failures inside the AWS SDK + * @throws SdkException on failures inside the AWS SDK * @throws IOException Other IO problems */ @Retries.OnceRaw - InitiateMultipartUploadResult initiateMultipartUpload( - InitiateMultipartUploadRequest request) throws IOException { - LOG.debug("Initiate multipart upload to {}", request.getKey()); + CreateMultipartUploadResponse initiateMultipartUpload( + CreateMultipartUploadRequest request) throws IOException { + LOG.debug("Initiate multipart upload to {}", request.key()); return trackDurationOfSupplier(getDurationTrackerFactory(), OBJECT_MULTIPART_UPLOAD_INITIATED.getSymbol(), - () -> getAmazonS3Client().initiateMultipartUpload(request)); + () -> s3Client.createMultipartUpload(request)); } /** @@ -4404,22 +4489,22 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ @Retries.RetryExceptionsSwallowed private void deleteUnnecessaryFakeDirectories(Path path) { - List keysToRemove = new ArrayList<>(); + List keysToRemove = new ArrayList<>(); while (!path.isRoot()) { String key = pathToKey(path); key = (key.endsWith("/")) ? key : (key + "/"); LOG.trace("To delete unnecessary fake directory {} for {}", key, path); - keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key)); + keysToRemove.add(ObjectIdentifier.builder().key(key).build()); path = path.getParent(); } try { removeKeys(keysToRemove, true); - } catch(AmazonClientException | IOException e) { + } catch (AwsServiceException | IOException e) { instrumentation.errorIgnored(); if (LOG.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); - for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) { - sb.append(kv.getKey()).append(","); + for (ObjectIdentifier objectIdentifier : keysToRemove) { + sb.append(objectIdentifier.key()).append(","); } LOG.debug("While deleting keys {} ", sb.toString(), e); } @@ -4452,11 +4537,18 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, @Retries.RetryTranslated private void createEmptyObject(final String objectName, PutObjectOptions putOptions) throws IOException { - invoker.retry("PUT 0-byte object ", objectName, - true, () -> - putObjectDirect(getRequestFactory().newDirectoryMarkerRequest(objectName), - putOptions, - getDurationTrackerFactory())); + final InputStream im = new InputStream() { + @Override + public int read() throws IOException { + return -1; + } + }; + + S3ADataBlocks.BlockUploadData uploadData = new S3ADataBlocks.BlockUploadData(im); + + invoker.retry("PUT 0-byte object ", objectName, true, + () -> putObjectDirect(getRequestFactory().newDirectoryMarkerRequest(objectName).build(), + putOptions, uploadData, false, getDurationTrackerFactory())); incrementPutProgressStatistics(objectName, 0); instrumentation.directoryCreated(); } @@ -4713,10 +4805,10 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, ETAG_CHECKSUM_ENABLED_DEFAULT)) { return trackDurationAndSpan(INVOCATION_GET_FILE_CHECKSUM, path, () -> { LOG.debug("getFileChecksum({})", path); - ObjectMetadata headers = getObjectMetadata(path, null, + HeadObjectResponse headers = getObjectMetadata(path, null, invoker, "getFileChecksum are"); - String eTag = headers.getETag(); + String eTag = headers.eTag(); return eTag != null ? new EtagChecksum(eTag) : null; }); } else { @@ -4798,11 +4890,18 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, HeaderProcessing.HeaderProcessingCallbacks { @Override - public ObjectMetadata getObjectMetadata(final String key) + public HeadObjectResponse getObjectMetadata(final String key) throws IOException { return once("getObjectMetadata", key, () -> S3AFileSystem.this.getObjectMetadata(key)); } + + @Override + public HeadBucketResponse getBucketMetadata() + throws IOException { + return once("getBucketMetadata", bucket, () -> + S3AFileSystem.this.getBucketMetadata()); + } } /** * {@inheritDoc}. @@ -4915,7 +5014,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, // If we have reached here, it means either there are files // in this directory or it is empty. return listFilesAssumingDir; - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("listFiles", path, e); } } @@ -5013,8 +5112,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, // span is picked up retained in the listing. return trackDurationAndSpan(MULTIPART_UPLOAD_LIST, prefix, null, () -> MultipartUtils.listMultipartUploads( - createStoreContext(), - s3, prefix, maxKeys + createStoreContext(), s3Client, prefix, maxKeys )); } @@ -5025,7 +5123,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * Retry policy: retry, translated. * @return a listing of multipart uploads. * @param prefix prefix to scan for, "" for none - * @throws IOException IO failure, including any uprated AmazonClientException + * @throws IOException IO failure, including any uprated SdkException */ @InterfaceAudience.Private @Retries.RetryTranslated @@ -5037,9 +5135,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, } String p = prefix; return invoker.retry("listMultipartUploads", p, true, () -> { - ListMultipartUploadsRequest request = getRequestFactory() - .newListMultipartUploadsRequest(p); - return s3.listMultipartUploads(request).getMultipartUploads(); + ListMultipartUploadsRequest.Builder requestBuilder = getRequestFactory() + .newListMultipartUploadsRequestBuilder(p); + return s3Client.listMultipartUploads(requestBuilder.build()).uploads(); }); } @@ -5052,10 +5150,10 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, @Retries.OnceRaw void abortMultipartUpload(String destKey, String uploadId) { LOG.info("Aborting multipart upload {} to {}", uploadId, destKey); - getAmazonS3Client().abortMultipartUpload( - getRequestFactory().newAbortMultipartUploadRequest( + s3Client.abortMultipartUpload( + getRequestFactory().newAbortMultipartUploadRequestBuilder( destKey, - uploadId)); + uploadId).build()); } /** @@ -5067,18 +5165,18 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, void abortMultipartUpload(MultipartUpload upload) { String destKey; String uploadId; - destKey = upload.getKey(); - uploadId = upload.getUploadId(); + destKey = upload.key(); + uploadId = upload.uploadId(); if (LOG.isInfoEnabled()) { DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); LOG.debug("Aborting multipart upload {} to {} initiated by {} on {}", - uploadId, destKey, upload.getInitiator(), - df.format(upload.getInitiated())); + uploadId, destKey, upload.initiator(), + df.format(Date.from(upload.initiated()))); } - getAmazonS3Client().abortMultipartUpload( - getRequestFactory().newAbortMultipartUploadRequest( + s3Client.abortMultipartUpload( + getRequestFactory().newAbortMultipartUploadRequestBuilder( destKey, - uploadId)); + uploadId).build()); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index 4b50ab2c04b..2ed9083efcd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -22,6 +22,7 @@ import javax.annotation.Nullable; import java.io.Closeable; import java.io.EOFException; import java.io.IOException; +import java.io.InputStream; import java.io.InterruptedIOException; import java.net.SocketTimeoutException; import java.nio.ByteBuffer; @@ -31,9 +32,9 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.IntFunction; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,7 +47,6 @@ import org.apache.hadoop.fs.CanUnbuffer; import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileRange; -import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.impl.CombinedFileRange; import org.apache.hadoop.fs.VectoredReadUtils; @@ -61,6 +61,7 @@ import org.apache.hadoop.fs.statistics.IOStatisticsSource; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.functional.CallableRaisingIOE; + import static java.util.Objects.requireNonNull; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.VectoredReadUtils.isOrderedDisjoint; @@ -125,14 +126,9 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, */ private volatile boolean closed; /** - * wrappedStream is associated with an object (instance of S3Object). When - * the object is garbage collected, the associated wrappedStream will be - * closed. Keep a reference to this object to prevent the wrapperStream - * still in use from being closed unexpectedly due to garbage collection. - * See HADOOP-17338 for details. + * Input stream returned by a getObject call. */ - private S3Object object; - private S3ObjectInputStream wrappedStream; + private ResponseInputStream wrappedStream; private final S3AReadOpContext context; private final InputStreamCallbacks client; @@ -271,28 +267,22 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, uri, reason, targetPos, contentRangeFinish, length, pos, nextReadPos, inputPolicy); + GetObjectRequest request = client.newGetRequestBuilder(key) + .range(S3AUtils.formatRange(targetPos, contentRangeFinish - 1)) + .applyMutation(changeTracker::maybeApplyConstraint) + .build(); long opencount = streamStatistics.streamOpened(); - GetObjectRequest request = client.newGetRequest(key) - .withRange(targetPos, contentRangeFinish - 1); String operation = opencount == 0 ? OPERATION_OPEN : OPERATION_REOPEN; String text = String.format("%s %s at %d", operation, uri, targetPos); - changeTracker.maybeApplyConstraint(request); - - object = onceTrackingDuration(text, uri, + wrappedStream = onceTrackingDuration(text, uri, streamStatistics.initiateGetRequest(), () -> client.getObject(request)); - - changeTracker.processResponse(object, operation, + changeTracker.processResponse(wrappedStream.response(), operation, targetPos); - wrappedStream = object.getObjectContent(); - contentRangeStart = targetPos; - if (wrappedStream == null) { - throw new PathIOException(uri, - "Null IO stream from " + operation + " of (" + reason + ") "); - } + contentRangeStart = targetPos; this.pos = targetPos; } @@ -505,14 +495,15 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, */ @Retries.OnceTranslated private void onReadFailure(IOException ioe, boolean forceAbort) { + GetObjectResponse objectResponse = wrappedStream == null ? null : wrappedStream.response(); if (LOG.isDebugEnabled()) { LOG.debug("Got exception while trying to read from stream {}, " + "client: {} object: {}, trying to recover: ", - uri, client, object, ioe); + uri, client, objectResponse, ioe); } else { LOG.info("Got exception while trying to read from stream {}, " + "client: {} object: {}, trying to recover: " + ioe, - uri, client, object); + uri, client, objectResponse); } streamStatistics.readException(); closeStream("failure recovery", forceAbort, false); @@ -672,7 +663,6 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, CompletableFuture operation; SDKStreamDrainer drainer = new SDKStreamDrainer( uri, - object, wrappedStream, shouldAbort, (int) remaining, @@ -694,7 +684,6 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, // either the stream is closed in the blocking call or the async call is // submitted with its own copy of the references wrappedStream = null; - object = null; return operation; } @@ -910,23 +899,19 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, private void readCombinedRangeAndUpdateChildren(CombinedFileRange combinedFileRange, IntFunction allocate) { LOG.debug("Start reading combined range {} from path {} ", combinedFileRange, pathStr); - // This reference must be kept till all buffers are populated as this is a - // finalizable object which closes the internal stream when gc triggers. - S3Object objectRange = null; - S3ObjectInputStream objectContent = null; + ResponseInputStream rangeContent = null; try { - objectRange = getS3ObjectAndValidateNotNull("readCombinedFileRange", + rangeContent = getS3ObjectInputStream("readCombinedFileRange", combinedFileRange.getOffset(), combinedFileRange.getLength()); - objectContent = objectRange.getObjectContent(); - populateChildBuffers(combinedFileRange, objectContent, allocate); + populateChildBuffers(combinedFileRange, rangeContent, allocate); } catch (Exception ex) { LOG.debug("Exception while reading a range {} from path {} ", combinedFileRange, pathStr, ex); for(FileRange child : combinedFileRange.getUnderlying()) { child.getData().completeExceptionally(ex); } } finally { - IOUtils.cleanupWithLogger(LOG, objectRange, objectContent); + IOUtils.cleanupWithLogger(LOG, rangeContent); } LOG.debug("Finished reading range {} from path {} ", combinedFileRange, pathStr); } @@ -939,7 +924,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, * @throws IOException any IOE. */ private void populateChildBuffers(CombinedFileRange combinedFileRange, - S3ObjectInputStream objectContent, + InputStream objectContent, IntFunction allocate) throws IOException { // If the combined file range just contains a single child // range, we only have to fill that one child buffer else @@ -971,7 +956,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, * @param drainQuantity how many bytes to drain. * @throws IOException any IOE. */ - private void drainUnnecessaryData(S3ObjectInputStream objectContent, long drainQuantity) + private void drainUnnecessaryData(InputStream objectContent, long drainQuantity) throws IOException { int drainBytes = 0; int readCount; @@ -1013,28 +998,24 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, */ private void readSingleRange(FileRange range, ByteBuffer buffer) { LOG.debug("Start reading range {} from path {} ", range, pathStr); - // This reference must be kept till all buffers are populated as this is a - // finalizable object which closes the internal stream when gc triggers. - S3Object objectRange = null; - S3ObjectInputStream objectContent = null; + ResponseInputStream objectRange = null; try { long position = range.getOffset(); int length = range.getLength(); - objectRange = getS3ObjectAndValidateNotNull("readSingleRange", position, length); - objectContent = objectRange.getObjectContent(); - populateBuffer(length, buffer, objectContent); + objectRange = getS3ObjectInputStream("readSingleRange", position, length); + populateBuffer(length, buffer, objectRange); range.getData().complete(buffer); } catch (Exception ex) { LOG.warn("Exception while reading a range {} from path {} ", range, pathStr, ex); range.getData().completeExceptionally(ex); } finally { - IOUtils.cleanupWithLogger(LOG, objectRange, objectContent); + IOUtils.cleanupWithLogger(LOG, objectRange); } LOG.debug("Finished reading range {} from path {} ", range, pathStr); } /** - * Get the s3 object for S3 server for a specified range. + * Get the s3 object input stream for S3 server for a specified range. * Also checks if the vectored io operation has been stopped before and after * the http get request such that we don't waste time populating the buffers. * @param operationName name of the operation for which get object on S3 is called. @@ -1043,15 +1024,11 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, * @return result s3 object. * @throws IOException exception if any. */ - private S3Object getS3ObjectAndValidateNotNull(final String operationName, - final long position, - final int length) throws IOException { + private ResponseInputStream getS3ObjectInputStream( + final String operationName, final long position, final int length) throws IOException { checkIfVectoredIOStopped(); - S3Object objectRange = getS3Object(operationName, position, length); - if (objectRange.getObjectContent() == null) { - throw new PathIOException(uri, - "Null IO stream received during " + operationName); - } + ResponseInputStream objectRange = + getS3Object(operationName, position, length); checkIfVectoredIOStopped(); return objectRange; } @@ -1066,7 +1043,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, */ private void populateBuffer(int length, ByteBuffer buffer, - S3ObjectInputStream objectContent) throws IOException { + InputStream objectContent) throws IOException { if (buffer.isDirect()) { VectoredReadUtils.readInDirectBuffer(length, buffer, @@ -1091,7 +1068,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, * @param length number of bytes to fill in dest. * @throws IOException any IOE. */ - private void readByteArray(S3ObjectInputStream objectContent, + private void readByteArray(InputStream objectContent, byte[] dest, int offset, int length) throws IOException { @@ -1118,13 +1095,16 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, * @return S3Object result s3 object. * @throws IOException exception if any. */ - private S3Object getS3Object(String operationName, long position, - int length) throws IOException { - final GetObjectRequest request = client.newGetRequest(key) - .withRange(position, position + length - 1); - changeTracker.maybeApplyConstraint(request); + private ResponseInputStream getS3Object(String operationName, + long position, + int length) + throws IOException { + final GetObjectRequest request = client.newGetRequestBuilder(key) + .range(S3AUtils.formatRange(position, position + length - 1)) + .applyMutation(changeTracker::maybeApplyConstraint) + .build(); DurationTracker tracker = streamStatistics.initiateGetRequest(); - S3Object objectRange; + ResponseInputStream objectRange; Invoker invoker = context.getReadInvoker(); try { objectRange = invoker.retry(operationName, pathStr, true, @@ -1139,7 +1119,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, } finally { tracker.close(); } - changeTracker.processResponse(objectRange, operationName, + changeTracker.processResponse(objectRange.response(), operationName, position); return objectRange; } @@ -1293,11 +1273,11 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, public interface InputStreamCallbacks extends Closeable { /** - * Create a GET request. + * Create a GET request builder. * @param key object key - * @return the request + * @return the request builder */ - GetObjectRequest newGetRequest(String key); + GetObjectRequest.Builder newGetRequestBuilder(String key); /** * Execute the request. @@ -1305,7 +1285,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, * @return the response */ @Retries.OnceRaw - S3Object getObject(GetObjectRequest request); + ResponseInputStream getObject(GetObjectRequest request); /** * Submit some asynchronous work, for example, draining a stream. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index 528a99f5e09..2f21be9e6d1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -30,7 +30,8 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; -import com.amazonaws.AmazonClientException; +import software.amazon.awssdk.core.exception.SdkException; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +43,7 @@ import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.net.ConnectTimeoutException; -import org.apache.hadoop.util.Preconditions; + import static org.apache.hadoop.io.retry.RetryPolicies.*; @@ -68,9 +69,9 @@ import static org.apache.hadoop.fs.s3a.Constants.*; * * The retry policy is all built around that of the normal IO exceptions, * particularly those extracted from - * {@link S3AUtils#translateException(String, Path, AmazonClientException)}. + * {@link S3AUtils#translateException(String, Path, SdkException)}. * Because the {@link #shouldRetry(Exception, int, int, boolean)} method - * does this translation if an {@code AmazonClientException} is processed, + * does this translation if an {@code SdkException} is processed, * the policy defined for the IOEs also applies to the original exceptions. * * Put differently: this retry policy aims to work for handlers of the @@ -242,11 +243,10 @@ public class S3ARetryPolicy implements RetryPolicy { boolean idempotent) throws Exception { Preconditions.checkArgument(exception != null, "Null exception"); Exception ex = exception; - if (exception instanceof AmazonClientException) { - // uprate the amazon client exception for the purpose of exception + if (exception instanceof SdkException) { + // update the sdk exception for the purpose of exception // processing. - ex = S3AUtils.translateException("", "", - (AmazonClientException) exception); + ex = S3AUtils.translateException("", "", (SdkException) exception); } return retryPolicy.shouldRetry(ex, retries, failovers, idempotent); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index dc4ee8a9493..100450aecb7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -18,19 +18,12 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AbortedException; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; -import com.amazonaws.SdkBaseException; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.retry.RetryUtils; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.AbortedException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.retry.RetryUtils; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.S3Object; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; @@ -47,13 +40,11 @@ import org.apache.hadoop.util.functional.RemoteIterators; import org.apache.hadoop.fs.s3a.audit.AuditFailureException; import org.apache.hadoop.fs.s3a.audit.AuditIntegration; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; -import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; -import org.apache.hadoop.fs.s3a.impl.NetworkBinding; -import org.apache.hadoop.fs.s3a.impl.V2Migration; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.ProviderUtils; -import org.apache.hadoop.util.VersionInfo; + import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.Logger; @@ -74,23 +65,18 @@ import java.net.SocketTimeoutException; import java.net.URI; import java.nio.file.AccessDeniedException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.Date; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; -import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator; @@ -105,10 +91,7 @@ public final class S3AUtils { static final String CONSTRUCTOR_EXCEPTION = "constructor exception"; static final String INSTANTIATION_EXCEPTION = "instantiation exception"; - static final String NOT_AWS_PROVIDER = - "does not implement AWSCredentialsProvider"; - static final String ABSTRACT_PROVIDER = - "is abstract and therefore cannot be created"; + static final String ENDPOINT_KEY = "Endpoint"; /** Filesystem is closed; kept here to keep the errors close. */ @@ -143,21 +126,13 @@ public final class S3AUtils { private static final String BUCKET_PATTERN = FS_S3A_BUCKET_PREFIX + "%s.%s"; - /** - * Error message when the AWS provider list built up contains a forbidden - * entry. - */ - @VisibleForTesting - public static final String E_FORBIDDEN_AWS_PROVIDER - = "AWS provider class cannot be used"; - private S3AUtils() { } /** * Translate an exception raised in an operation into an IOException. * The specific type of IOException depends on the class of - * {@link AmazonClientException} passed in, and any status codes included + * {@link SdkException} passed in, and any status codes included * in the operation. That is: HTTP error codes are examined and can be * used to build a more specific response. * @@ -170,14 +145,14 @@ public final class S3AUtils { */ public static IOException translateException(String operation, Path path, - AmazonClientException exception) { + SdkException exception) { return translateException(operation, path.toString(), exception); } /** * Translate an exception raised in an operation into an IOException. * The specific type of IOException depends on the class of - * {@link AmazonClientException} passed in, and any status codes included + * {@link SdkException} passed in, and any status codes included * in the operation. That is: HTTP error codes are examined and can be * used to build a more specific response. * @param operation operation @@ -188,12 +163,12 @@ public final class S3AUtils { @SuppressWarnings("ThrowableInstanceNeverThrown") public static IOException translateException(@Nullable String operation, String path, - SdkBaseException exception) { + SdkException exception) { String message = String.format("%s%s: %s", operation, StringUtils.isNotEmpty(path)? (" on " + path) : "", exception); - if (!(exception instanceof AmazonServiceException)) { + if (!(exception instanceof AwsServiceException)) { Exception innerCause = containsInterruptedException(exception); if (innerCause != null) { // interrupted IO, or a socket exception underneath that class @@ -217,45 +192,44 @@ public final class S3AUtils { return new AWSClientIOException(message, exception); } else { IOException ioe; - AmazonServiceException ase = (AmazonServiceException) exception; + AwsServiceException ase = (AwsServiceException) exception; // this exception is non-null if the service exception is an s3 one - AmazonS3Exception s3Exception = ase instanceof AmazonS3Exception - ? (AmazonS3Exception) ase + S3Exception s3Exception = ase instanceof S3Exception + ? (S3Exception) ase : null; - int status = ase.getStatusCode(); - message = message + ":" + ase.getErrorCode(); + int status = ase.statusCode(); + if (ase.awsErrorDetails() != null) { + message = message + ":" + ase.awsErrorDetails().errorCode(); + } switch (status) { - case 301: - case 307: + case SC_301_MOVED_PERMANENTLY: + case SC_307_TEMPORARY_REDIRECT: if (s3Exception != null) { - if (s3Exception.getAdditionalDetails() != null && - s3Exception.getAdditionalDetails().containsKey(ENDPOINT_KEY)) { - message = String.format("Received permanent redirect response to " - + "endpoint %s. This likely indicates that the S3 endpoint " - + "configured in %s does not match the AWS region containing " - + "the bucket.", - s3Exception.getAdditionalDetails().get(ENDPOINT_KEY), ENDPOINT); - } + message = String.format("Received permanent redirect response to " + + "region %s. This likely indicates that the S3 region " + + "configured in %s does not match the AWS region containing " + "the bucket.", + s3Exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER), + AWS_REGION); ioe = new AWSRedirectException(message, s3Exception); } else { ioe = new AWSRedirectException(message, ase); } break; - case 400: + case SC_400_BAD_REQUEST: ioe = new AWSBadRequestException(message, ase); break; // permissions - case 401: - case 403: + case SC_401_UNAUTHORIZED: + case SC_403_FORBIDDEN: ioe = new AccessDeniedException(path, null, message); ioe.initCause(ase); break; // the object isn't there - case 404: + case SC_404_NOT_FOUND: if (isUnknownBucket(ase)) { // this is a missing bucket ioe = new UnknownStoreException(path, message, ase); @@ -268,20 +242,20 @@ public final class S3AUtils { // this also surfaces sometimes and is considered to // be ~ a not found exception. - case 410: + case SC_410_GONE: ioe = new FileNotFoundException(message); ioe.initCause(ase); break; // method not allowed; seen on S3 Select. // treated as a bad request - case 405: + case SC_405_METHOD_NOT_ALLOWED: ioe = new AWSBadRequestException(message, s3Exception); break; // out of range. This may happen if an object is overwritten with // a shorter one while it is being read. - case 416: + case SC_416_RANGE_NOT_SATISFIABLE: ioe = new EOFException(message); ioe.initCause(ase); break; @@ -289,26 +263,26 @@ public final class S3AUtils { // this has surfaced as a "no response from server" message. // so rare we haven't replicated it. // Treating as an idempotent proxy error. - case 443: - case 444: + case SC_443_NO_RESPONSE: + case SC_444_NO_RESPONSE: ioe = new AWSNoResponseException(message, ase); break; // throttling - case 503: + case SC_503_SERVICE_UNAVAILABLE: ioe = new AWSServiceThrottledException(message, ase); break; // internal error - case 500: + case SC_500_INTERNAL_SERVER_ERROR: ioe = new AWSStatus500Exception(message, ase); break; - case 200: + case SC_200_OK: if (exception instanceof MultiObjectDeleteException) { // failure during a bulk delete - return translateDeleteException(message, - (MultiObjectDeleteException) exception); + return ((MultiObjectDeleteException) exception) + .translateException(message); } // other 200: FALL THROUGH @@ -334,10 +308,35 @@ public final class S3AUtils { public static IOException extractException(String operation, String path, ExecutionException ee) { + return convertExceptionCause(operation, path, ee.getCause()); + } + + /** + * Extract an exception from a failed future, and convert to an IOE. + * @param operation operation which failed + * @param path path operated on (may be null) + * @param ce completion exception + * @return an IOE which can be thrown + */ + public static IOException extractException(String operation, + String path, + CompletionException ce) { + return convertExceptionCause(operation, path, ce.getCause()); + } + + /** + * Convert the cause of a concurrent exception to an IOE. + * @param operation operation which failed + * @param path path operated on (may be null) + * @param cause cause of a concurrent exception + * @return an IOE which can be thrown + */ + private static IOException convertExceptionCause(String operation, + String path, + Throwable cause) { IOException ioe; - Throwable cause = ee.getCause(); - if (cause instanceof AmazonClientException) { - ioe = translateException(operation, path, (AmazonClientException) cause); + if (cause instanceof SdkException) { + ioe = translateException(operation, path, (SdkException) cause); } else if (cause instanceof IOException) { ioe = (IOException) cause; } else { @@ -375,7 +374,7 @@ public final class S3AUtils { * @return an IOE which can be rethrown */ private static InterruptedIOException translateInterruptedException( - SdkBaseException exception, + SdkException exception, final Exception innerCause, String message) { InterruptedIOException ioe; @@ -386,6 +385,7 @@ public final class S3AUtils { if (name.endsWith(".ConnectTimeoutException") || name.endsWith(".ConnectionPoolTimeoutException") || name.endsWith("$ConnectTimeoutException")) { + // TODO: review in v2 // TCP connection http timeout from the shaded or unshaded filenames // com.amazonaws.thirdparty.apache.http.conn.ConnectTimeoutException ioe = new ConnectTimeoutException(message); @@ -409,10 +409,10 @@ public final class S3AUtils { */ public static boolean isThrottleException(Exception ex) { return ex instanceof AWSServiceThrottledException - || (ex instanceof AmazonServiceException - && 503 == ((AmazonServiceException)ex).getStatusCode()) - || (ex instanceof SdkBaseException - && RetryUtils.isThrottlingException((SdkBaseException) ex)); + || (ex instanceof AwsServiceException + && 503 == ((AwsServiceException)ex).statusCode()) + || (ex instanceof SdkException + && RetryUtils.isThrottlingException((SdkException) ex)); } /** @@ -422,7 +422,8 @@ public final class S3AUtils { * @param ex exception * @return true if this is believed to be a sign the connection was broken. */ - public static boolean isMessageTranslatableToEOF(SdkBaseException ex) { + public static boolean isMessageTranslatableToEOF(SdkException ex) { + // TODO: review in v2 return ex.toString().contains(EOF_MESSAGE_IN_XML_PARSER) || ex.toString().contains(EOF_READ_DIFFERENT_LENGTH); } @@ -432,47 +433,26 @@ public final class S3AUtils { * @param e exception * @return string details */ - public static String stringify(AmazonServiceException e) { + public static String stringify(AwsServiceException e) { StringBuilder builder = new StringBuilder( - String.format("%s: %s error %d: %s; %s%s%n", - e.getErrorType(), - e.getServiceName(), - e.getStatusCode(), - e.getErrorCode(), - e.getErrorMessage(), - (e.isRetryable() ? " (retryable)": "") + String.format("%s error %d: %s; %s%s%n", + e.awsErrorDetails().serviceName(), + e.statusCode(), + e.awsErrorDetails().errorCode(), + e.awsErrorDetails().errorMessage(), + (e.retryable() ? " (retryable)": "") )); - String rawResponseContent = e.getRawResponseContent(); + String rawResponseContent = e.awsErrorDetails().rawResponse().asUtf8String(); if (rawResponseContent != null) { builder.append(rawResponseContent); } return builder.toString(); } - /** - * Get low level details of an amazon exception for logging; multi-line. - * @param e exception - * @return string details - */ - public static String stringify(AmazonS3Exception e) { - // get the low level details of an exception, - StringBuilder builder = new StringBuilder( - stringify((AmazonServiceException) e)); - Map details = e.getAdditionalDetails(); - if (details != null) { - builder.append('\n'); - for (Map.Entry d : details.entrySet()) { - builder.append(d.getKey()).append('=') - .append(d.getValue()).append('\n'); - } - } - return builder.toString(); - } - /** * Create a files status instance from a listing. * @param keyPath path to entry - * @param summary summary from AWS + * @param s3Object s3Object entry * @param blockSize block size to declare. * @param owner owner of the file * @param eTag S3 object eTag or null if unavailable @@ -481,20 +461,20 @@ public final class S3AUtils { * @return a status entry */ public static S3AFileStatus createFileStatus(Path keyPath, - S3ObjectSummary summary, + S3Object s3Object, long blockSize, String owner, String eTag, String versionId, boolean isCSEEnabled) { - long size = summary.getSize(); + long size = s3Object.size(); // check if cse is enabled; strip out constant padding length. if (isCSEEnabled && size >= CSE_PADDING_LENGTH) { size -= CSE_PADDING_LENGTH; } return createFileStatus(keyPath, - objectRepresentsDirectory(summary.getKey()), - size, summary.getLastModified(), blockSize, owner, eTag, versionId); + objectRepresentsDirectory(s3Object.key()), + size, Date.from(s3Object.lastModified()), blockSize, owner, eTag, versionId); } /** @@ -555,115 +535,8 @@ public final class S3AUtils { return date.getTime(); } - /** - * The standard AWS provider list for AWS connections. - */ - @SuppressWarnings("deprecation") - public static final List> - STANDARD_AWS_PROVIDERS = Collections.unmodifiableList( - Arrays.asList( - TemporaryAWSCredentialsProvider.class, - SimpleAWSCredentialsProvider.class, - EnvironmentVariableCredentialsProvider.class, - IAMInstanceCredentialsProvider.class)); - - /** - * Create the AWS credentials from the providers, the URI and - * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration. - * @param binding Binding URI -may be null - * @param conf filesystem configuration - * @return a credentials provider list - * @throws IOException Problems loading the providers (including reading - * secrets from credential files). - */ - public static AWSCredentialProviderList createAWSCredentialProviderSet( - @Nullable URI binding, - Configuration conf) throws IOException { - // this will reject any user:secret entries in the URI - S3xLoginHelper.rejectSecretsInURIs(binding); - AWSCredentialProviderList credentials = - buildAWSProviderList(binding, - conf, - AWS_CREDENTIALS_PROVIDER, - STANDARD_AWS_PROVIDERS, - new HashSet<>()); - // make sure the logging message strips out any auth details - LOG.debug("For URI {}, using credentials {}", - binding, credentials); - return credentials; - } - - /** - * Load list of AWS credential provider/credential provider factory classes. - * @param conf configuration - * @param key key - * @param defaultValue list of default values - * @return the list of classes, possibly empty - * @throws IOException on a failure to load the list. - */ - public static List> loadAWSProviderClasses(Configuration conf, - String key, - Class... defaultValue) throws IOException { - try { - return Arrays.asList(conf.getClasses(key, defaultValue)); - } catch (RuntimeException e) { - Throwable c = e.getCause() != null ? e.getCause() : e; - throw new IOException("From option " + key + ' ' + c, c); - } - } - - /** - * Load list of AWS credential provider/credential provider factory classes; - * support a forbidden list to prevent loops, mandate full secrets, etc. - * @param binding Binding URI -may be null - * @param conf configuration - * @param key key - * @param forbidden a possibly empty set of forbidden classes. - * @param defaultValues list of default providers. - * @return the list of classes, possibly empty - * @throws IOException on a failure to load the list. - */ - public static AWSCredentialProviderList buildAWSProviderList( - @Nullable final URI binding, - final Configuration conf, - final String key, - final List> defaultValues, - final Set> forbidden) throws IOException { - - // build up the base provider - List> awsClasses = loadAWSProviderClasses(conf, - key, - defaultValues.toArray(new Class[defaultValues.size()])); - // and if the list is empty, switch back to the defaults. - // this is to address the issue that configuration.getClasses() - // doesn't return the default if the config value is just whitespace. - if (awsClasses.isEmpty()) { - awsClasses = defaultValues; - } - // iterate through, checking for blacklists and then instantiating - // each provider - AWSCredentialProviderList providers = new AWSCredentialProviderList(); - for (Class aClass : awsClasses) { - - // List of V1 credential providers that will be migrated with V2 upgrade - if (!Arrays.asList("EnvironmentVariableCredentialsProvider", - "EC2ContainerCredentialsProviderWrapper", "InstanceProfileCredentialsProvider") - .contains(aClass.getSimpleName()) && aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)) { - V2Migration.v1ProviderReferenced(aClass.getName()); - } - - if (forbidden.contains(aClass)) { - throw new IOException(E_FORBIDDEN_AWS_PROVIDER - + " in option " + key + ": " + aClass); - } - providers.add(createAWSCredentialProvider(conf, - aClass, binding)); - } - return providers; - } - - /** - * Create an AWS credential provider from its class by using reflection. The + /*** + * Creates an instance of a class using reflection. The * class must implement one of the following means of construction, which are * attempted in order: * @@ -672,92 +545,83 @@ public final class S3AUtils { * org.apache.hadoop.conf.Configuration *

  • a public constructor accepting * org.apache.hadoop.conf.Configuration
  • - *
  • a public static method named getInstance that accepts no + *
  • a public static method named as per methodName, that accepts no * arguments and returns an instance of - * com.amazonaws.auth.AWSCredentialsProvider, or
  • + * specified type, or *
  • a public default constructor.
  • * * + * @param instanceClass Class for which instance is to be created * @param conf configuration - * @param credClass credential class * @param uri URI of the FS - * @return the instantiated class - * @throws IOException on any instantiation failure. + * @param interfaceImplemented interface that this class implements + * @param methodName name of factory method to be invoked + * @param configKey config key under which this class is specified + * @param Instance of class + * @return instance of the specified class + * @throws IOException on any problem */ - private static AWSCredentialsProvider createAWSCredentialProvider( - Configuration conf, - Class credClass, - @Nullable URI uri) throws IOException { - AWSCredentialsProvider credentials = null; - String className = credClass.getName(); - if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) { - throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER); - } - if (Modifier.isAbstract(credClass.getModifiers())) { - throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); - } - LOG.debug("Credential provider class is {}", className); + @SuppressWarnings("unchecked") + public static InstanceT getInstanceFromReflection(Class instanceClass, + Configuration conf, @Nullable URI uri, Class interfaceImplemented, String methodName, + String configKey) throws IOException { + + String className = instanceClass.getName(); try { - // new X(uri, conf) - Constructor cons = getConstructor(credClass, URI.class, - Configuration.class); - if (cons != null) { - credentials = (AWSCredentialsProvider)cons.newInstance(uri, conf); - return credentials; - } - // new X(conf) - cons = getConstructor(credClass, Configuration.class); - if (cons != null) { - credentials = (AWSCredentialsProvider)cons.newInstance(conf); - return credentials; + Constructor cons = null; + if (conf != null) { + // new X(uri, conf) + cons = getConstructor(instanceClass, URI.class, Configuration.class); + + if (cons != null) { + return (InstanceT) cons.newInstance(uri, conf); + } + // new X(conf) + cons = getConstructor(instanceClass, Configuration.class); + if (cons != null) { + return (InstanceT) cons.newInstance(conf); + } } - // X.getInstance() - Method factory = getFactoryMethod(credClass, AWSCredentialsProvider.class, - "getInstance"); + // X.methodName() + Method factory = getFactoryMethod(instanceClass, interfaceImplemented, methodName); if (factory != null) { - credentials = (AWSCredentialsProvider)factory.invoke(null); - return credentials; + return (InstanceT) factory.invoke(null); } // new X() - cons = getConstructor(credClass); + cons = getConstructor(instanceClass); if (cons != null) { - credentials = (AWSCredentialsProvider)cons.newInstance(); - return credentials; + return (InstanceT) cons.newInstance(); } // no supported constructor or factory method found throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION + ". A class specified in %s must provide a public constructor " + "of a supported signature, or a public factory method named " - + "getInstance that accepts no arguments.", - className, AWS_CREDENTIALS_PROVIDER)); + + "create that accepts no arguments.", className, configKey)); } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (targetException == null) { - targetException = e; + targetException = e; } if (targetException instanceof IOException) { throw (IOException) targetException; - } else if (targetException instanceof SdkBaseException) { - throw translateException("Instantiate " + className, "", - (SdkBaseException) targetException); + } else if (targetException instanceof SdkException) { + throw translateException("Instantiate " + className, "", (SdkException) targetException); } else { // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION - + ": " + targetException, + throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + targetException, targetException); } } catch (ReflectiveOperationException | IllegalArgumentException e) { // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION - + ": " + e, - e); + throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + e, e); } } + /** * Set a key if the value is non-empty. * @param config config to patch @@ -941,13 +805,13 @@ public final class S3AUtils { /** * String information about a summary entry for debug messages. - * @param summary summary object + * @param s3Object s3Object entry * @return string value */ - public static String stringify(S3ObjectSummary summary) { - StringBuilder builder = new StringBuilder(summary.getKey().length() + 100); - builder.append(summary.getKey()).append(' '); - builder.append("size=").append(summary.getSize()); + public static String stringify(S3Object s3Object) { + StringBuilder builder = new StringBuilder(s3Object.key().length() + 100); + builder.append(s3Object.key()).append(' '); + builder.append("size=").append(s3Object.size()); return builder.toString(); } @@ -1215,213 +1079,6 @@ public final class S3AUtils { } } - /** - * Create a new AWS {@code ClientConfiguration}. - * All clients to AWS services MUST use this for consistent setup - * of connectivity, UA, proxy settings. - * @param conf The Hadoop configuration - * @param bucket Optional bucket to use to look up per-bucket proxy secrets - * @return new AWS client configuration - * @throws IOException problem creating AWS client configuration - * - * @deprecated use {@link #createAwsConf(Configuration, String, String)} - */ - @Deprecated - public static ClientConfiguration createAwsConf(Configuration conf, - String bucket) - throws IOException { - return createAwsConf(conf, bucket, null); - } - - /** - * Create a new AWS {@code ClientConfiguration}. All clients to AWS services - * MUST use this or the equivalents for the specific service for - * consistent setup of connectivity, UA, proxy settings. - * - * @param conf The Hadoop configuration - * @param bucket Optional bucket to use to look up per-bucket proxy secrets - * @param awsServiceIdentifier a string representing the AWS service (S3, - * etc) for which the ClientConfiguration is being created. - * @return new AWS client configuration - * @throws IOException problem creating AWS client configuration - */ - public static ClientConfiguration createAwsConf(Configuration conf, - String bucket, String awsServiceIdentifier) - throws IOException { - final ClientConfiguration awsConf = new ClientConfiguration(); - initConnectionSettings(conf, awsConf); - initProxySupport(conf, bucket, awsConf); - initUserAgent(conf, awsConf); - if (StringUtils.isNotEmpty(awsServiceIdentifier)) { - String configKey = null; - switch (awsServiceIdentifier) { - case AWS_SERVICE_IDENTIFIER_S3: - configKey = SIGNING_ALGORITHM_S3; - break; - case AWS_SERVICE_IDENTIFIER_STS: - configKey = SIGNING_ALGORITHM_STS; - break; - default: - // Nothing to do. The original signer override is already setup - } - if (configKey != null) { - String signerOverride = conf.getTrimmed(configKey, ""); - if (!signerOverride.isEmpty()) { - LOG.debug("Signer override for {}} = {}", awsServiceIdentifier, - signerOverride); - awsConf.setSignerOverride(signerOverride); - } - } - } - return awsConf; - } - - /** - * Initializes all AWS SDK settings related to connection management. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration - * - * @throws IOException if there was an error initializing the protocol - * settings - */ - public static void initConnectionSettings(Configuration conf, - ClientConfiguration awsConf) throws IOException { - awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS, - DEFAULT_MAXIMUM_CONNECTIONS, 1)); - initProtocolSettings(conf, awsConf); - awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES, - DEFAULT_MAX_ERROR_RETRIES, 0)); - awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT, - DEFAULT_ESTABLISH_TIMEOUT, 0)); - awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT, - DEFAULT_SOCKET_TIMEOUT, 0)); - int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER, - DEFAULT_SOCKET_SEND_BUFFER, 2048); - int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER, - DEFAULT_SOCKET_RECV_BUFFER, 2048); - long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, - DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); - - if (requestTimeoutMillis > Integer.MAX_VALUE) { - LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", - requestTimeoutMillis, Integer.MAX_VALUE); - requestTimeoutMillis = Integer.MAX_VALUE; - } - awsConf.setRequestTimeout((int) requestTimeoutMillis); - awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer); - String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); - if (!signerOverride.isEmpty()) { - LOG.debug("Signer override = {}", signerOverride); - awsConf.setSignerOverride(signerOverride); - } - } - - /** - * Initializes the connection protocol settings when connecting to S3 (e.g. - * either HTTP or HTTPS). If secure connections are enabled, this method - * will load the configured SSL providers. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration - * - * @throws IOException if there is an error initializing the configured - * {@link javax.net.ssl.SSLSocketFactory} - */ - private static void initProtocolSettings(Configuration conf, - ClientConfiguration awsConf) throws IOException { - boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, - DEFAULT_SECURE_CONNECTIONS); - awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP); - if (secureConnections) { - NetworkBinding.bindSSLChannelMode(conf, awsConf); - } - } - - /** - * Initializes AWS SDK proxy support in the AWS client configuration - * if the S3A settings enable it. - * - * @param conf Hadoop configuration - * @param bucket Optional bucket to use to look up per-bucket proxy secrets - * @param awsConf AWS SDK configuration to update - * @throws IllegalArgumentException if misconfigured - * @throws IOException problem getting username/secret from password source. - */ - public static void initProxySupport(Configuration conf, - String bucket, - ClientConfiguration awsConf) throws IllegalArgumentException, - IOException { - String proxyHost = conf.getTrimmed(PROXY_HOST, ""); - int proxyPort = conf.getInt(PROXY_PORT, -1); - if (!proxyHost.isEmpty()) { - awsConf.setProxyHost(proxyHost); - if (proxyPort >= 0) { - awsConf.setProxyPort(proxyPort); - } else { - if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { - LOG.warn("Proxy host set without port. Using HTTPS default 443"); - awsConf.setProxyPort(443); - } else { - LOG.warn("Proxy host set without port. Using HTTP default 80"); - awsConf.setProxyPort(80); - } - } - final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME, - null, null); - final String proxyPassword = lookupPassword(bucket, conf, PROXY_PASSWORD, - null, null); - if ((proxyUsername == null) != (proxyPassword == null)) { - String msg = "Proxy error: " + PROXY_USERNAME + " or " + - PROXY_PASSWORD + " set without the other."; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - boolean isProxySecured = conf.getBoolean(PROXY_SECURED, false); - awsConf.setProxyUsername(proxyUsername); - awsConf.setProxyPassword(proxyPassword); - awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN)); - awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); - awsConf.setProxyProtocol(isProxySecured ? Protocol.HTTPS : Protocol.HTTP); - if (LOG.isDebugEnabled()) { - LOG.debug("Using proxy server {}://{}:{} as user {} with password {} " - + "on domain {} as workstation {}", - awsConf.getProxyProtocol(), - awsConf.getProxyHost(), - awsConf.getProxyPort(), - String.valueOf(awsConf.getProxyUsername()), - awsConf.getProxyPassword(), awsConf.getProxyDomain(), - awsConf.getProxyWorkstation()); - } - } else if (proxyPort >= 0) { - String msg = - "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - } - - /** - * Initializes the User-Agent header to send in HTTP requests to AWS - * services. We always include the Hadoop version number. The user also - * may set an optional custom prefix to put in front of the Hadoop version - * number. The AWS SDK internally appends its own information, which seems - * to include the AWS SDK version, OS and JVM version. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration to update - */ - private static void initUserAgent(Configuration conf, - ClientConfiguration awsConf) { - String userAgent = "Hadoop " + VersionInfo.getVersion(); - String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); - if (!userAgentPrefix.isEmpty()) { - userAgent = userAgentPrefix + ", " + userAgent; - } - LOG.debug("Using User-Agent: {}", userAgent); - awsConf.setUserAgentPrefix(userAgent); - } - /** * Convert the data of an iterator of {@link S3AFileStatus} to * an array. @@ -1915,4 +1572,15 @@ public final class S3AUtils { } }; + /** + * Format a byte range for a request header. + * See https://www.rfc-editor.org/rfc/rfc9110.html#section-14.1.2 + * + * @param rangeStart the start byte offset + * @param rangeEnd the end byte offset (inclusive) + * @return a formatted byte range + */ + public static String formatRange(long rangeStart, long rangeEnd) { + return String.format("bytes=%d-%d", rangeStart, rangeEnd); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 9010f34dc25..fa2c0769d26 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -24,11 +24,14 @@ import java.net.URI; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Executor; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.monitoring.MonitoringListener; -import com.amazonaws.services.s3.AmazonS3; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.transfer.s3.S3TransferManager; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -37,7 +40,7 @@ import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; /** - * Factory for creation of {@link AmazonS3} client instances. + * Factory for creation of {@link S3Client} client instances. * Important: HBase's HBoss module implements this interface in its * tests. * Take care when updating this interface to ensure that a client @@ -49,20 +52,44 @@ import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; */ @InterfaceAudience.LimitedPrivate("HBoss") @InterfaceStability.Evolving -@Deprecated public interface S3ClientFactory { /** - * Creates a new {@link AmazonS3} client. + * Creates a new {@link S3Client}. + * The client returned supports synchronous operations. For + * asynchronous operations, use + * {@link #createS3AsyncClient(URI, S3ClientCreationParameters)}. * * @param uri S3A file system URI * @param parameters parameter object * @return S3 client - * @throws IOException IO problem + * @throws IOException on any IO problem */ - AmazonS3 createS3Client(URI uri, + S3Client createS3Client(URI uri, S3ClientCreationParameters parameters) throws IOException; + /** + * Creates a new {@link S3AsyncClient}. + * The client returned supports asynchronous operations. For + * synchronous operations, use + * {@link #createS3Client(URI, S3ClientCreationParameters)}. + * + * @param uri S3A file system URI + * @param parameters parameter object + * @return Async S3 client + * @throws IOException on any IO problem + */ + S3AsyncClient createS3AsyncClient(URI uri, + S3ClientCreationParameters parameters) throws IOException; + + /** + * Creates a new {@link S3TransferManager}. + * + * @param s3AsyncClient the async client to be used by the TM. + * @return S3 transfer manager + */ + S3TransferManager createS3TransferManager(S3AsyncClient s3AsyncClient); + /** * Settings for the S3 Client. * Implemented as a class to pass in so that adding @@ -74,7 +101,7 @@ public interface S3ClientFactory { /** * Credentials. */ - private AWSCredentialsProvider credentialSet; + private AwsCredentialsProvider credentialSet; /** * Endpoint. @@ -86,11 +113,6 @@ public interface S3ClientFactory { */ private final Map headers = new HashMap<>(); - /** - * Monitoring listener. - */ - private MonitoringListener monitoringListener; - /** * RequestMetricCollector metrics...if not-null will be wrapped * with an {@code AwsStatisticsCollector} and passed to @@ -109,9 +131,9 @@ public interface S3ClientFactory { private boolean requesterPays; /** - * Request handlers; used for auditing, X-Ray etc. - */ - private List requestHandlers; + * Execution interceptors; used for auditing, X-Ray etc. + * */ + private List executionInterceptors; /** * Suffix to UA. @@ -125,37 +147,38 @@ public interface S3ClientFactory { private URI pathUri; /** - * List of request handlers to include in the chain - * of request execution in the SDK. - * @return the handler list + * Minimum part size for transfer parts. */ - public List getRequestHandlers() { - return requestHandlers; + private long minimumPartSize; + + /** + * Executor that the transfer manager will use to execute background tasks. + */ + private Executor transferManagerExecutor; + + /** + * Region of the S3 bucket. + */ + private Region region; + + + /** + * List of execution interceptors to include in the chain + * of interceptors in the SDK. + * @return the interceptors list + */ + public List getExecutionInterceptors() { + return executionInterceptors; } /** - * List of request handlers. - * @param handlers handler list. + * List of execution interceptors. + * @param interceptors interceptors list. * @return this object */ - public S3ClientCreationParameters withRequestHandlers( - @Nullable final List handlers) { - requestHandlers = handlers; - return this; - } - - public MonitoringListener getMonitoringListener() { - return monitoringListener; - } - - /** - * listener for AWS monitoring events. - * @param listener listener - * @return this object - */ - public S3ClientCreationParameters withMonitoringListener( - @Nullable final MonitoringListener listener) { - monitoringListener = listener; + public S3ClientCreationParameters withExecutionInterceptors( + @Nullable final List interceptors) { + executionInterceptors = interceptors; return this; } @@ -191,7 +214,7 @@ public interface S3ClientFactory { return requesterPays; } - public AWSCredentialsProvider getCredentialSet() { + public AwsCredentialsProvider getCredentialSet() { return credentialSet; } @@ -202,7 +225,7 @@ public interface S3ClientFactory { */ public S3ClientCreationParameters withCredentialSet( - final AWSCredentialsProvider value) { + final AwsCredentialsProvider value) { credentialSet = value; return this; } @@ -294,5 +317,63 @@ public interface S3ClientFactory { pathUri = value; return this; } + + /** + * Get the minimum part size for transfer parts. + * @return part size + */ + public long getMinimumPartSize() { + return minimumPartSize; + } + + /** + * Set the minimum part size for transfer parts. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withMinimumPartSize( + final long value) { + minimumPartSize = value; + return this; + } + + /** + * Get the executor that the transfer manager will use to execute background tasks. + * @return part size + */ + public Executor getTransferManagerExecutor() { + return transferManagerExecutor; + } + + /** + * Set the executor that the transfer manager will use to execute background tasks. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withTransferManagerExecutor( + final Executor value) { + transferManagerExecutor = value; + return this; + } + + /** + * Set region. + * + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withRegion( + final Region value) { + region = value; + return this; + } + + /** + * Get the region. + * @return invoker + */ + public Region getRegion() { + return region; + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java index d51211516f2..c729f3de15f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java @@ -18,8 +18,8 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; /** * API version-independent container for S3 List requests. @@ -78,14 +78,14 @@ public final class S3ListRequest { public String toString() { if (isV1()) { return String.format(DESCRIPTION, - v1Request.getBucketName(), v1Request.getPrefix(), - v1Request.getDelimiter(), v1Request.getMaxKeys(), - v1Request.isRequesterPays()); + v1Request.bucket(), v1Request.prefix(), + v1Request.delimiter(), v1Request.maxKeys(), + v1Request.requestPayerAsString()); } else { return String.format(DESCRIPTION, - v2Request.getBucketName(), v2Request.getPrefix(), - v2Request.getDelimiter(), v2Request.getMaxKeys(), - v2Request.isRequesterPays()); + v2Request.bucket(), v2Request.prefix(), + v2Request.delimiter(), v2Request.maxKeys(), + v2Request.requestPayerAsString()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java index 69c42bfe147..c77311211ab 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java @@ -22,19 +22,21 @@ import java.util.Collection; import java.util.List; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.S3ObjectSummary; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.S3Object; + import org.slf4j.Logger; /** * API version-independent container for S3 List responses. */ public class S3ListResult { - private ObjectListing v1Result; - private ListObjectsV2Result v2Result; + private ListObjectsResponse v1Result; + private ListObjectsV2Response v2Result; - protected S3ListResult(ObjectListing v1, ListObjectsV2Result v2) { + protected S3ListResult(ListObjectsResponse v1, ListObjectsV2Response v2) { v1Result = v1; v2Result = v2; } @@ -44,7 +46,7 @@ public class S3ListResult { * @param result v1 result * @return new list result container */ - public static S3ListResult v1(ObjectListing result) { + public static S3ListResult v1(ListObjectsResponse result) { return new S3ListResult(result, null); } @@ -53,7 +55,7 @@ public class S3ListResult { * @param result v2 result * @return new list result container */ - public static S3ListResult v2(ListObjectsV2Result result) { + public static S3ListResult v2(ListObjectsV2Response result) { return new S3ListResult(null, result); } @@ -65,19 +67,19 @@ public class S3ListResult { return v1Result != null; } - public ObjectListing getV1() { + public ListObjectsResponse getV1() { return v1Result; } - public ListObjectsV2Result getV2() { + public ListObjectsV2Response getV2() { return v2Result; } - public List getObjectSummaries() { + public List getS3Objects() { if (isV1()) { - return v1Result.getObjectSummaries(); + return v1Result.contents(); } else { - return v2Result.getObjectSummaries(); + return v2Result.contents(); } } @@ -89,21 +91,21 @@ public class S3ListResult { } } - public List getCommonPrefixes() { + public List getCommonPrefixes() { if (isV1()) { - return v1Result.getCommonPrefixes(); + return v1Result.commonPrefixes(); } else { - return v2Result.getCommonPrefixes(); + return v2Result.commonPrefixes(); } } /** - * Get the list of keys in the object summary. + * Get the list of keys in the list result. * @return a possibly empty list */ - private List objectSummaryKeys() { - return getObjectSummaries().stream() - .map(S3ObjectSummary::getKey) + private List objectKeys() { + return getS3Objects().stream() + .map(S3Object::key) .collect(Collectors.toList()); } @@ -112,9 +114,8 @@ public class S3ListResult { * @return true if the result is non-empty */ public boolean hasPrefixesOrObjects() { - return !(getCommonPrefixes()).isEmpty() - || !getObjectSummaries().isEmpty(); + || !getS3Objects().isEmpty(); } /** @@ -128,7 +129,7 @@ public class S3ListResult { // no children. // So the listing must contain the marker entry only as an object, // and prefixes is null - List keys = objectSummaryKeys(); + List keys = objectKeys(); return keys.size() == 1 && keys.contains(dirKey) && getCommonPrefixes().isEmpty(); } @@ -138,15 +139,15 @@ public class S3ListResult { * @param log log to use */ public void logAtDebug(Logger log) { - Collection prefixes = getCommonPrefixes(); - Collection summaries = getObjectSummaries(); + Collection prefixes = getCommonPrefixes(); + Collection s3Objects = getS3Objects(); log.debug("Prefix count = {}; object count={}", - prefixes.size(), summaries.size()); - for (S3ObjectSummary summary : summaries) { - log.debug("Summary: {} {}", summary.getKey(), summary.getSize()); + prefixes.size(), s3Objects.size()); + for (S3Object s3Object : s3Objects) { + log.debug("Summary: {} {}", s3Object.key(), s3Object.size()); } - for (String prefix : prefixes) { - log.debug("Prefix: {}", prefix); + for (CommonPrefix prefix : prefixes) { + log.debug("Prefix: {}", prefix.prefix()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java index 0a0454854b2..4fc5b8658b6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java @@ -18,8 +18,6 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.transfer.model.CopyResult; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Path; @@ -74,31 +72,6 @@ public class S3ObjectAttributes { this.len = len; } - /** - * Construct from the result of a copy and those parameters - * which aren't included in an AWS SDK response. - * @param path path - * @param copyResult copy result. - * @param serverSideEncryptionAlgorithm current encryption algorithm - * @param serverSideEncryptionKey any server side encryption key? - * @param len object length - */ - public S3ObjectAttributes( - final Path path, - final CopyResult copyResult, - final S3AEncryptionMethods serverSideEncryptionAlgorithm, - final String serverSideEncryptionKey, - final long len) { - this.bucket = copyResult.getDestinationBucketName(); - this.key = copyResult.getDestinationKey(); - this.path = path; - this.serverSideEncryptionAlgorithm = serverSideEncryptionAlgorithm; - this.serverSideEncryptionKey = serverSideEncryptionKey; - this.eTag = copyResult.getETag(); - this.versionId = copyResult.getVersionId(); - this.len = len; - } - public String getBucket() { return bucket; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java index 6579a2bc3e7..b7de937f513 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java @@ -39,6 +39,5 @@ import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException; */ @InterfaceAudience.Public @InterfaceStability.Evolving -@SuppressWarnings("deprecation") public final class SharedInstanceCredentialProvider extends IAMInstanceCredentialsProvider { } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java index 657bd437b5b..6da468142b9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java @@ -18,9 +18,9 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; @@ -42,13 +42,10 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.getAWSAccessKeys; * property fs.s3a.aws.credentials.provider. Therefore, changing the class name * would be a backward-incompatible change. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Stable -@Deprecated -public class SimpleAWSCredentialsProvider implements AWSCredentialsProvider { +public class SimpleAWSCredentialsProvider implements AwsCredentialsProvider { public static final String NAME = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"; @@ -80,17 +77,14 @@ public class SimpleAWSCredentialsProvider implements AWSCredentialsProvider { } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { if (!StringUtils.isEmpty(accessKey) && !StringUtils.isEmpty(secretKey)) { - return new BasicAWSCredentials(accessKey, secretKey); + return AwsBasicCredentials.create(accessKey, secretKey); } throw new NoAwsCredentialsException("SimpleAWSCredentialsProvider", "No AWS credentials in the Hadoop configuration"); } - @Override - public void refresh() {} - @Override public String toString() { return getClass().getSimpleName(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java index 651769ff283..ae761fe270f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -538,6 +538,11 @@ public enum Statistic { StoreStatisticNames.STORE_IO_THROTTLE_RATE, "Rate of S3 request throttling", TYPE_QUANTILE), + STORE_REGION_PROBE( + StoreStatisticNames.STORE_REGION_PROBE, + "Store Region Probe", + TYPE_DURATION + ), /* * Delegation Token Operations. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java index db3d0bb1329..5587b11a36f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java @@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a; import javax.annotation.Nullable; import java.io.IOException; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import java.net.URI; @@ -44,12 +44,9 @@ import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException; * This credential provider must not fail in creation because that will * break a chain of credential providers. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Stable -@Deprecated public class TemporaryAWSCredentialsProvider extends AbstractSessionCredentialsProvider { public static final String NAME @@ -92,7 +89,7 @@ public class TemporaryAWSCredentialsProvider extends AbstractSessionCredentialsP * @throws NoAwsCredentialsException the credentials are actually empty. */ @Override - protected AWSCredentials createCredentials(Configuration config) + protected AwsCredentials createCredentials(Configuration config) throws IOException { MarshalledCredentials creds = MarshalledCredentialBinding.fromFileSystem( getUri(), config); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java index 238cd97ed8f..85ed69e14bc 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java @@ -18,22 +18,22 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.transfer.Upload; +import software.amazon.awssdk.transfer.s3.model.FileUpload; /** * Simple struct that contains information about a S3 upload. */ public class UploadInfo { - private final Upload upload; + private final FileUpload fileUpload; private final long length; - public UploadInfo(Upload upload, long length) { - this.upload = upload; + public UploadInfo(FileUpload upload, long length) { + this.fileUpload = upload; this.length = length; } - public Upload getUpload() { - return upload; + public FileUpload getFileUpload() { + return fileUpload; } public long getLength() { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index 0bbb8a35f51..1eacc4cbe07 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -19,29 +19,28 @@ package org.apache.hadoop.fs.s3a; import javax.annotation.Nullable; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.util.List; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -50,6 +49,8 @@ import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.s3a.impl.StoreContext; +import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher; +import org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; import org.apache.hadoop.fs.s3a.select.SelectBinding; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; @@ -239,48 +240,24 @@ public class WriteOperationHelper implements WriteOperations { /** * Create a {@link PutObjectRequest} request against the specific key. * @param destKey destination key - * @param inputStream source data. * @param length size, if known. Use -1 for not known * @param options options for the request + * @param isFile is data to be uploaded a file * @return the request */ @Retries.OnceRaw public PutObjectRequest createPutObjectRequest(String destKey, - InputStream inputStream, long length, - final PutObjectOptions options) { + final PutObjectOptions options, + boolean isFile) { + activateAuditSpan(); - ObjectMetadata objectMetadata = newObjectMetadata(length); - return getRequestFactory().newPutObjectRequest( - destKey, - objectMetadata, - options, - inputStream); + + return getRequestFactory() + .newPutObjectRequestBuilder(destKey, options, length, false) + .build(); } - /** - * Create a {@link PutObjectRequest} request to upload a file. - * @param dest key to PUT to. - * @param sourceFile source file - * @param options options for the request - * @return the request - */ - @Retries.OnceRaw - public PutObjectRequest createPutObjectRequest( - String dest, - File sourceFile, - final PutObjectOptions options) { - activateAuditSpan(); - final ObjectMetadata objectMetadata = - newObjectMetadata((int) sourceFile.length()); - - PutObjectRequest putObjectRequest = getRequestFactory(). - newPutObjectRequest(dest, - objectMetadata, - options, - sourceFile); - return putObjectRequest; - } /** * Callback on a successful write. @@ -297,17 +274,6 @@ public class WriteOperationHelper implements WriteOperations { LOG.debug("Write to {} failed", this, ex); } - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * @param length size, if known. Use -1 for not known - * @return a new metadata instance - */ - public ObjectMetadata newObjectMetadata(long length) { - return getRequestFactory().newObjectMetadata(length); - } - /** * {@inheritDoc} */ @@ -320,11 +286,11 @@ public class WriteOperationHelper implements WriteOperations { try (AuditSpan span = activateAuditSpan()) { return retry("initiate MultiPartUpload", destKey, true, () -> { - final InitiateMultipartUploadRequest initiateMPURequest = - getRequestFactory().newMultipartUploadRequest( + final CreateMultipartUploadRequest.Builder initiateMPURequestBuilder = + getRequestFactory().newMultipartUploadRequestBuilder( destKey, options); - return owner.initiateMultipartUpload(initiateMPURequest) - .getUploadId(); + return owner.initiateMultipartUpload(initiateMPURequestBuilder.build()) + .uploadId(); }); } } @@ -345,10 +311,10 @@ public class WriteOperationHelper implements WriteOperations { * @throws IOException on problems. */ @Retries.RetryTranslated - private CompleteMultipartUploadResult finalizeMultipartUpload( + private CompleteMultipartUploadResponse finalizeMultipartUpload( String destKey, String uploadId, - List partETags, + List partETags, long length, PutObjectOptions putOptions, Retried retrying) throws IOException { @@ -357,18 +323,18 @@ public class WriteOperationHelper implements WriteOperations { "No upload parts in multipart upload"); } try (AuditSpan span = activateAuditSpan()) { - CompleteMultipartUploadResult uploadResult; + CompleteMultipartUploadResponse uploadResult; uploadResult = invoker.retry("Completing multipart upload", destKey, true, retrying, () -> { - final CompleteMultipartUploadRequest request = - getRequestFactory().newCompleteMultipartUploadRequest( + final CompleteMultipartUploadRequest.Builder requestBuilder = + getRequestFactory().newCompleteMultipartUploadRequestBuilder( destKey, uploadId, partETags); - return writeOperationHelperCallbacks.completeMultipartUpload(request); + return writeOperationHelperCallbacks.completeMultipartUpload(requestBuilder.build()); }); - owner.finishedWrite(destKey, length, uploadResult.getETag(), - uploadResult.getVersionId(), + owner.finishedWrite(destKey, length, uploadResult.eTag(), + uploadResult.versionId(), putOptions); return uploadResult; } @@ -391,10 +357,10 @@ public class WriteOperationHelper implements WriteOperations { * the retry count was exceeded */ @Retries.RetryTranslated - public CompleteMultipartUploadResult completeMPUwithRetries( + public CompleteMultipartUploadResponse completeMPUwithRetries( String destKey, String uploadId, - List partETags, + List partETags, long length, AtomicInteger errorCount, PutObjectOptions putOptions) @@ -452,7 +418,7 @@ public class WriteOperationHelper implements WriteOperations { @Retries.RetryTranslated public void abortMultipartUpload(MultipartUpload upload) throws IOException { - invoker.retry("Aborting multipart commit", upload.getKey(), true, + invoker.retry("Aborting multipart commit", upload.key(), true, withinAuditSpan(getAuditSpan(), () -> owner.abortMultipartUpload(upload))); } @@ -477,7 +443,7 @@ public class WriteOperationHelper implements WriteOperations { abortMultipartUpload(upload); count++; } catch (FileNotFoundException e) { - LOG.debug("Already aborted: {}", upload.getKey(), e); + LOG.debug("Already aborted: {}", upload.key(), e); } } return count; @@ -506,45 +472,31 @@ public class WriteOperationHelper implements WriteOperations { } /** - * Create and initialize a part request of a multipart upload. - * Exactly one of: {@code uploadStream} or {@code sourceFile} - * must be specified. - * A subset of the file may be posted, by providing the starting point - * in {@code offset} and a length of block in {@code size} equal to - * or less than the remaining bytes. + * Create and initialize a part request builder of a multipart upload. * The part number must be less than 10000. * Retry policy is once-translated; to much effort * @param destKey destination key of ongoing operation * @param uploadId ID of ongoing upload * @param partNumber current part number of the upload * @param size amount of data - * @param uploadStream source of data to upload - * @param sourceFile optional source file. - * @param offset offset in file to start reading. - * @return the request. + * @return the request builder. * @throws IllegalArgumentException if the parameters are invalid. * @throws PathIOException if the part number is out of range. */ @Override @Retries.OnceTranslated - public UploadPartRequest newUploadPartRequest( + public UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - Long offset) throws IOException { + long size) throws IOException { return once("upload part request", destKey, withinAuditSpan(getAuditSpan(), () -> - getRequestFactory().newUploadPartRequest( + getRequestFactory().newUploadPartRequestBuilder( destKey, uploadId, partNumber, - size, - uploadStream, - sourceFile, - offset))); + size))); } /** @@ -566,18 +518,20 @@ public class WriteOperationHelper implements WriteOperations { * @param putObjectRequest the request * @param putOptions put object options * @param durationTrackerFactory factory for duration tracking + * @param uploadData data to be uploaded + * @param isFile is data to be uploaded a file + * * @return the upload initiated * @throws IOException on problems */ @Retries.RetryTranslated - public PutObjectResult putObject(PutObjectRequest putObjectRequest, - PutObjectOptions putOptions, + public PutObjectResponse putObject(PutObjectRequest putObjectRequest, + PutObjectOptions putOptions, S3ADataBlocks.BlockUploadData uploadData, boolean isFile, DurationTrackerFactory durationTrackerFactory) throws IOException { - return retry("Writing Object", - putObjectRequest.getKey(), true, - withinAuditSpan(getAuditSpan(), () -> - owner.putObjectDirect(putObjectRequest, putOptions, durationTrackerFactory))); + return retry("Writing Object", putObjectRequest.key(), true, withinAuditSpan(getAuditSpan(), + () -> owner.putObjectDirect(putObjectRequest, putOptions, uploadData, isFile, + durationTrackerFactory))); } /** @@ -613,10 +567,10 @@ public class WriteOperationHelper implements WriteOperations { * the retry count was exceeded */ @Retries.RetryTranslated - public CompleteMultipartUploadResult commitUpload( + public CompleteMultipartUploadResponse commitUpload( String destKey, String uploadId, - List partETags, + List partETags, long length) throws IOException { checkNotNull(uploadId); @@ -635,19 +589,21 @@ public class WriteOperationHelper implements WriteOperations { * Upload part of a multi-partition file. * @param request request * @param durationTrackerFactory duration tracker factory for operation + * @param request the upload part request. + * @param body the request body. * @return the result of the operation. * @throws IOException on problems */ @Retries.RetryTranslated - public UploadPartResult uploadPart(UploadPartRequest request, + public UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, final DurationTrackerFactory durationTrackerFactory) throws IOException { - return retry("upload part #" + request.getPartNumber() - + " upload ID " + request.getUploadId(), - request.getKey(), + return retry("upload part #" + request.partNumber() + + " upload ID " + request.uploadId(), + request.key(), true, withinAuditSpan(getAuditSpan(), - () -> owner.uploadPart(request, durationTrackerFactory))); + () -> owner.uploadPart(request, body, durationTrackerFactory))); } /** @@ -659,15 +615,9 @@ public class WriteOperationHelper implements WriteOperations { return conf; } - /** - * Create a S3 Select request for the destination path. - * This does not build the query. - * @param path pre-qualified path for query - * @return the request - */ - public SelectObjectContentRequest newSelectRequest(Path path) { + public SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path) { try (AuditSpan span = getAuditSpan()) { - return getRequestFactory().newSelectRequest( + return getRequestFactory().newSelectRequestBuilder( storeContext.pathToKey(path)); } } @@ -676,26 +626,27 @@ public class WriteOperationHelper implements WriteOperations { * Execute an S3 Select operation. * On a failure, the request is only logged at debug to avoid the * select exception being printed. - * @param source source for selection + * + * @param source source for selection * @param request Select request to issue. - * @param action the action for use in exception creation + * @param action the action for use in exception creation * @return response * @throws IOException failure */ @Retries.RetryTranslated - public SelectObjectContentResult select( + public SelectEventStreamPublisher select( final Path source, final SelectObjectContentRequest request, final String action) throws IOException { // no setting of span here as the select binding is (statically) created // without any span. - String bucketName = request.getBucketName(); + String bucketName = request.bucket(); Preconditions.checkArgument(bucket.equals(bucketName), "wrong bucket: %s", bucketName); if (LOG.isDebugEnabled()) { LOG.debug("Initiating select call {} {}", - source, request.getExpression()); + source, request.expression()); LOG.debug(SelectBinding.toString(request)); } return invoker.retry( @@ -706,8 +657,9 @@ public class WriteOperationHelper implements WriteOperations { try (DurationInfo ignored = new DurationInfo(LOG, "S3 Select operation")) { try { - return writeOperationHelperCallbacks.selectObjectContent(request); - } catch (AmazonS3Exception e) { + return SelectObjectContentHelper.select( + writeOperationHelperCallbacks, source, request, action); + } catch (Throwable e) { LOG.error("Failure of S3 Select request against {}", source); LOG.debug("S3 Select request against {}:\n{}", @@ -756,16 +708,18 @@ public class WriteOperationHelper implements WriteOperations { /** * Initiates a select request. * @param request selectObjectContent request + * @param t selectObjectContent request handler * @return selectObjectContentResult */ - SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request); + CompletableFuture selectObjectContent(SelectObjectContentRequest request, + SelectObjectContentResponseHandler t); /** * Initiates a complete multi-part upload request. * @param request Complete multi-part upload request * @return completeMultipartUploadResult */ - CompleteMultipartUploadResult completeMultipartUpload(CompleteMultipartUploadRequest request); + CompleteMultipartUploadResponse completeMultipartUpload(CompleteMultipartUploadRequest request); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java index 1c3d3688575..a3d3a13043c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java @@ -20,29 +20,28 @@ package org.apache.hadoop.fs.s3a; import javax.annotation.Nullable; import java.io.Closeable; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.store.audit.AuditSpan; +import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher; import org.apache.hadoop.fs.store.audit.AuditSpanSource; import org.apache.hadoop.util.functional.CallableRaisingIOE; @@ -77,27 +76,15 @@ public interface WriteOperations extends AuditSpanSource, Closeable { /** * Create a {@link PutObjectRequest} request against the specific key. * @param destKey destination key - * @param inputStream source data. * @param length size, if known. Use -1 for not known * @param options options for the request + * @param isFile is data to be uploaded a file * @return the request */ PutObjectRequest createPutObjectRequest(String destKey, - InputStream inputStream, long length, - @Nullable PutObjectOptions options); - - /** - * Create a {@link PutObjectRequest} request to upload a file. - * @param dest key to PUT to. - * @param sourceFile source file - * @param options options for the request - * @return the request - */ - PutObjectRequest createPutObjectRequest( - String dest, - File sourceFile, - @Nullable PutObjectOptions options); + @Nullable PutObjectOptions options, + boolean isFile); /** * Callback on a successful write. @@ -111,15 +98,6 @@ public interface WriteOperations extends AuditSpanSource, Closeable { */ void writeFailed(Exception ex); - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * @param length size, if known. Use -1 for not known - * @return a new metadata instance - */ - ObjectMetadata newObjectMetadata(long length); - /** * Start the multipart upload process. * Retry policy: retrying, translated. @@ -148,10 +126,10 @@ public interface WriteOperations extends AuditSpanSource, Closeable { * the retry count was exceeded */ @Retries.RetryTranslated - CompleteMultipartUploadResult completeMPUwithRetries( + CompleteMultipartUploadResponse completeMPUwithRetries( String destKey, String uploadId, - List partETags, + List partETags, long length, AtomicInteger errorCount, PutObjectOptions putOptions) @@ -213,31 +191,20 @@ public interface WriteOperations extends AuditSpanSource, Closeable { throws IOException; /** - * Create and initialize a part request of a multipart upload. - * Exactly one of: {@code uploadStream} or {@code sourceFile} - * must be specified. - * A subset of the file may be posted, by providing the starting point - * in {@code offset} and a length of block in {@code size} equal to - * or less than the remaining bytes. + * Create and initialize a part request builder of a multipart upload. * @param destKey destination key of ongoing operation * @param uploadId ID of ongoing upload * @param partNumber current part number of the upload * @param size amount of data - * @param uploadStream source of data to upload - * @param sourceFile optional source file. - * @param offset offset in file to start reading. - * @return the request. + * @return the request builder. * @throws IllegalArgumentException if the parameters are invalid * @throws PathIOException if the part number is out of range. */ - UploadPartRequest newUploadPartRequest( + UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - Long offset) throws IOException; + long size) throws IOException; /** * PUT an object directly (i.e. not via the transfer manager). @@ -246,12 +213,14 @@ public interface WriteOperations extends AuditSpanSource, Closeable { * @param putObjectRequest the request * @param putOptions put object options * @param durationTrackerFactory factory for duration tracking + * @param uploadData data to be uploaded + * @param isFile is data to be uploaded a file * @return the upload initiated * @throws IOException on problems */ @Retries.RetryTranslated - PutObjectResult putObject(PutObjectRequest putObjectRequest, - PutObjectOptions putOptions, + PutObjectResponse putObject(PutObjectRequest putObjectRequest, + PutObjectOptions putOptions, S3ADataBlocks.BlockUploadData uploadData, boolean isFile, DurationTrackerFactory durationTrackerFactory) throws IOException; @@ -279,22 +248,23 @@ public interface WriteOperations extends AuditSpanSource, Closeable { * the retry count was exceeded */ @Retries.RetryTranslated - CompleteMultipartUploadResult commitUpload( + CompleteMultipartUploadResponse commitUpload( String destKey, String uploadId, - List partETags, + List partETags, long length) throws IOException; /** * Upload part of a multi-partition file. - * @param request request + * @param request the upload part request. + * @param body the request body. * @param durationTrackerFactory factory for duration tracking * @return the result of the operation. * @throws IOException on problems */ @Retries.RetryTranslated - UploadPartResult uploadPart(UploadPartRequest request, + UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, DurationTrackerFactory durationTrackerFactory) throws IOException; @@ -306,25 +276,32 @@ public interface WriteOperations extends AuditSpanSource, Closeable { Configuration getConf(); /** - * Create a S3 Select request for the destination path. + * Get the audit span this object was created with. + * @return the audit span + */ + AuditSpan getAuditSpan(); + + /** + * Create a S3 Select request builder for the destination path. * This does not build the query. * @param path pre-qualified path for query - * @return the request + * @return the request builder */ - SelectObjectContentRequest newSelectRequest(Path path); + SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path); /** * Execute an S3 Select operation. * On a failure, the request is only logged at debug to avoid the * select exception being printed. - * @param source source for selection + * + * @param source source for selection * @param request Select request to issue. - * @param action the action for use in exception creation + * @param action the action for use in exception creation * @return response * @throws IOException failure */ @Retries.RetryTranslated - SelectObjectContentResult select( + SelectEventStreamPublisher select( Path source, SelectObjectContentRequest request, String action) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java new file mode 100644 index 00000000000..242a29fe213 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSSessionCredentials; +import com.amazonaws.auth.AnonymousAWSCredentials; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; + +/** + * Adapts a V1 {@link AWSCredentialsProvider} to the V2 {@link AwsCredentialsProvider} interface. + * Implements both interfaces so can be used with either the V1 or V2 AWS SDK. + */ +final class V1ToV2AwsCredentialProviderAdapter implements V1V2AwsCredentialProviderAdapter { + + private final AWSCredentialsProvider v1CredentialsProvider; + + private V1ToV2AwsCredentialProviderAdapter(AWSCredentialsProvider v1CredentialsProvider) { + this.v1CredentialsProvider = v1CredentialsProvider; + } + + @Override + public AwsCredentials resolveCredentials() { + AWSCredentials toAdapt = v1CredentialsProvider.getCredentials(); + if (toAdapt instanceof AWSSessionCredentials) { + return AwsSessionCredentials.create(toAdapt.getAWSAccessKeyId(), + toAdapt.getAWSSecretKey(), + ((AWSSessionCredentials) toAdapt).getSessionToken()); + } else if (toAdapt instanceof AnonymousAWSCredentials) { + return AnonymousCredentialsProvider.create().resolveCredentials(); + } else { + return AwsBasicCredentials.create(toAdapt.getAWSAccessKeyId(), toAdapt.getAWSSecretKey()); + } + } + + @Override + public AWSCredentials getCredentials() { + return v1CredentialsProvider.getCredentials(); + } + + @Override + public void refresh() { + v1CredentialsProvider.refresh(); + } + + /** + * @param v1CredentialsProvider V1 credential provider to adapt. + * @return A new instance of the credentials provider adapter. + */ + static V1ToV2AwsCredentialProviderAdapter create(AWSCredentialsProvider v1CredentialsProvider) { + return new V1ToV2AwsCredentialProviderAdapter(v1CredentialsProvider); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java new file mode 100644 index 00000000000..f27166a9ef9 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + +public interface V1V2AwsCredentialProviderAdapter extends AWSCredentialsProvider, + AwsCredentialsProvider { + + /** + * Creates a two-way adapter from a V1 {@link AWSCredentialsProvider} interface. + * + * @param v1CredentialsProvider V1 credentials provider. + * @return Two-way credential provider adapter. + */ + static V1V2AwsCredentialProviderAdapter adapt(AWSCredentialsProvider v1CredentialsProvider) { + return V1ToV2AwsCredentialProviderAdapter.create(v1CredentialsProvider); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java new file mode 100644 index 00000000000..8d03c915e17 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Adapter classes for allowing V1 credential providers to be used with SDKV2. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.s3a.adapter; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 2a4771925f0..c7984aa4e26 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -19,33 +19,27 @@ package org.apache.hadoop.fs.s3a.api; import javax.annotation.Nullable; -import java.io.File; -import java.io.InputStream; import java.util.List; -import java.util.Optional; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.StorageClass; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; @@ -79,22 +73,7 @@ public interface RequestFactory { * Get the canned ACL of this FS. * @return an ACL, if any */ - CannedAccessControlList getCannedACL(); - - /** - * Create the AWS SDK structure used to configure SSE, - * if the encryption secrets contain the information/settings for this. - * @return an optional set of KMS Key settings - */ - Optional generateSSEAwsKeyParams(); - - /** - * Create the SSE-C structure for the AWS SDK, if the encryption secrets - * contain the information/settings for this. - * This will contain a secret extracted from the bucket/configuration. - * @return an optional customer key. - */ - Optional generateSSECustomerKey(); + String getCannedACL(); /** * Get the encryption algorithm of this endpoint. @@ -115,79 +94,58 @@ public interface RequestFactory { StorageClass getStorageClass(); /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header; Ignored if negative - * @return a new metadata instance - */ - ObjectMetadata newObjectMetadata(long length); - - /** - * Create a copy request. + * Create a copy request builder. * This includes the work of copying the relevant parts * of the metadata from the source * @param srcKey source * @param dstKey destination * @param srcom source object metadata. - * @return the request + * @return the request builder */ - CopyObjectRequest newCopyObjectRequest(String srcKey, + CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey, String dstKey, - ObjectMetadata srcom); + HeadObjectResponse srcom); + /** - * Create a putObject request. - * Adds the ACL and metadata - * @param key key of object - * @param metadata metadata header - * @param options options for the request - * @param srcfile source file - * @return the request - */ - PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, PutObjectOptions options, File srcfile); - - /** - * Create a {@link PutObjectRequest} request. + * Create a {@link PutObjectRequest} request builder. * The metadata is assumed to have been configured with the size of the * operation. * @param key key of object - * @param metadata metadata header * @param options options for the request - * @param inputStream source data. - * @return the request + * @param length length of object to be uploaded + * @param isDirectoryMarker true if object to be uploaded is a directory marker + * @return the request builder */ - PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, + PutObjectRequest.Builder newPutObjectRequestBuilder(String key, PutObjectOptions options, - InputStream inputStream); + long length, + boolean isDirectoryMarker); /** * Create a {@link PutObjectRequest} request for creating * an empty directory. * * @param directory destination directory. - * @return request for a zero byte upload. + * @return request builder for a zero byte upload. */ - PutObjectRequest newDirectoryMarkerRequest(String directory); + PutObjectRequest.Builder newDirectoryMarkerRequest(String directory); /** * List all multipart uploads under a prefix. * @param prefix prefix to list under - * @return the request. + * @return the request builder. */ - ListMultipartUploadsRequest newListMultipartUploadsRequest( + ListMultipartUploadsRequest.Builder newListMultipartUploadsRequestBuilder( @Nullable String prefix); /** * Abort a multipart upload. * @param destKey destination object key * @param uploadId ID of initiated upload - * @return the request. + * @return the request builder. */ - AbortMultipartUploadRequest newAbortMultipartUploadRequest( + AbortMultipartUploadRequest.Builder newAbortMultipartUploadRequestBuilder( String destKey, String uploadId); @@ -195,10 +153,10 @@ public interface RequestFactory { * Start a multipart upload. * @param destKey destination object key * @param options options for the request - * @return the request. + * @return the request builder. * @throws PathIOException if multipart uploads are disabled */ - InitiateMultipartUploadRequest newMultipartUploadRequest( + CreateMultipartUploadRequest.Builder newMultipartUploadRequestBuilder( String destKey, @Nullable PutObjectOptions options) throws PathIOException; @@ -207,107 +165,95 @@ public interface RequestFactory { * @param destKey destination object key * @param uploadId ID of initiated upload * @param partETags ordered list of etags - * @return the request. + * @return the request builder. */ - CompleteMultipartUploadRequest newCompleteMultipartUploadRequest( + CompleteMultipartUploadRequest.Builder newCompleteMultipartUploadRequestBuilder( String destKey, String uploadId, - List partETags); + List partETags); /** - * Create a HEAD request. + * Create a HEAD object request builder. * @param key key, may have trailing / - * @return the request. + * @return the request builder. */ - GetObjectMetadataRequest newGetObjectMetadataRequest(String key); + HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key); /** - * Create a GET request. + * Create a HEAD bucket request builder. + * @param bucket bucket to get metadata for + * @return the request builder. + */ + HeadBucketRequest.Builder newHeadBucketRequestBuilder(String bucket); + + + /** + * Create a GET request builder. * @param key object key - * @return the request. + * @return the request builder. */ - GetObjectRequest newGetObjectRequest(String key); + GetObjectRequest.Builder newGetObjectRequestBuilder(String key); /** - * Create and initialize a part request of a multipart upload. - * Exactly one of: {@code uploadStream} or {@code sourceFile} - * must be specified. - * A subset of the file may be posted, by providing the starting point - * in {@code offset} and a length of block in {@code size} equal to - * or less than the remaining bytes. - * @param destKey destination key of ongoing operation - * @param uploadId ID of ongoing upload - * @param partNumber current part number of the upload - * @param size amount of data - * @param uploadStream source of data to upload - * @param sourceFile optional source file. - * @param offset offset in file to start reading. - * @return the request. + * Create and initialize a part request builder of a multipart upload. + * + * @param destKey destination key of ongoing operation + * @param uploadId ID of ongoing upload + * @param partNumber current part number of the upload + * @param size amount of data + * @return the request builder. * @throws PathIOException if the part number is out of range. */ - UploadPartRequest newUploadPartRequest( + UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - long offset) throws PathIOException; + long size) throws PathIOException; /** - * Create a S3 Select request for the destination object. + * Create a S3 Select request builder for the destination object. * This does not build the query. * @param key object key - * @return the request + * @return the request builder */ - SelectObjectContentRequest newSelectRequest(String key); + SelectObjectContentRequest.Builder newSelectRequestBuilder(String key); /** - * Create the (legacy) V1 list request. + * Create the (legacy) V1 list request builder. * @param key key to list under * @param delimiter delimiter for keys * @param maxKeys maximum number in a list page. - * @return the request + * @return the request builder. */ - ListObjectsRequest newListObjectsV1Request(String key, + ListObjectsRequest.Builder newListObjectsV1RequestBuilder(String key, String delimiter, int maxKeys); /** - * Create the next V1 page list request, following - * on from the previous response. - * @param prev previous response - * @return the request - */ - - ListNextBatchOfObjectsRequest newListNextBatchOfObjectsRequest( - ObjectListing prev); - - /** - * Create a V2 list request. + * Create a V2 list request builder. * This will be recycled for any subsequent requests. * @param key key to list under * @param delimiter delimiter for keys * @param maxKeys maximum number in a list page. - * @return the request + * @return the request builder. */ - ListObjectsV2Request newListObjectsV2Request(String key, + ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(String key, String delimiter, int maxKeys); /** - * Create a request to delete a single object. + * Create a request builder to delete a single object. * @param key object to delete - * @return the request + * @return the request builder. */ - DeleteObjectRequest newDeleteObjectRequest(String key); + DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key); /** - * Bulk delete request. + * Create a request builder to delete objects in bulk. * @param keysToDelete list of keys to delete. - * @return the request + * @return the request builder. */ - DeleteObjectsRequest newBulkDeleteRequest( - List keysToDelete); + DeleteObjectsRequest.Builder newBulkDeleteRequestBuilder( + List keysToDelete); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java index 8134d5cea94..712246c2196 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java @@ -18,15 +18,9 @@ package org.apache.hadoop.fs.s3a.audit; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.SdkBaseException; -import com.amazonaws.handlers.HandlerAfterAttemptContext; -import com.amazonaws.handlers.HandlerBeforeAttemptContext; -import com.amazonaws.http.HttpResponse; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; -import org.apache.hadoop.fs.s3a.Retries; /** * Callbacks for audit spans. This is implemented @@ -37,10 +31,10 @@ import org.apache.hadoop.fs.s3a.Retries; * detect this and raise an exception. * * Look at the documentation for - * {@code com.amazonaws.handlers.IRequestHandler2} for details + * {@code ExecutionInterceptor} for details * on the callbacks. */ -public interface AWSAuditEventCallbacks { +public interface AWSAuditEventCallbacks extends ExecutionInterceptor { /** * Return a span ID which must be unique for all spans within @@ -66,95 +60,8 @@ public interface AWSAuditEventCallbacks { * It is not invoked on any AWS requests created in the SDK. * Avoid raising exceptions or talking to any remote service; * this callback is for annotation rather than validation. - * @param request request request. - * @param type of request - * @return the request, possibly modified. + * @param builder the request builder. */ - default T requestCreated(T request) { - return request; - } + default void requestCreated(SdkRequest.Builder builder) {} - /** - * Preflight preparation of AWS request. - * @param request request - * @param type of request - * @return an updated request. - * @throws AuditFailureException for generic audit failures - * @throws SdkBaseException for other reasons. - */ - @Retries.OnceRaw - default T beforeExecution(T request) - throws AuditFailureException, SdkBaseException { - return request; - } - - /** - * Callback after S3 responded to a request. - * @param request request - * @param response response. - * @throws AuditFailureException for generic audit failures - * @throws SdkBaseException for other reasons. - */ - default void afterResponse(Request request, - Response response) - throws AuditFailureException, SdkBaseException { - } - - /** - * Callback after a request resulted in an error. - * @param request request - * @param response response. - * @param exception exception raised. - * @throws AuditFailureException for generic audit failures - * @throws SdkBaseException for other reasons. - */ - default void afterError(Request request, - Response response, - Exception exception) - throws AuditFailureException, SdkBaseException { - } - - /** - * Request before marshalling. - * @param request request - * @return possibly modified request. - */ - default AmazonWebServiceRequest beforeMarshalling( - AmazonWebServiceRequest request) { - return request; - } - - /** - * Request before marshalling. - * @param request request - */ - default void beforeRequest(Request request) { - } - - /** - * Before any attempt is made. - * @param context full context, including the request. - */ - default void beforeAttempt(HandlerBeforeAttemptContext context) { - } - - /** - * After any attempt is made. - * @param context full context, including the request. - */ - default void afterAttempt( - HandlerAfterAttemptContext context) { - } - - /** - * Before unmarshalling the response. - * @param request request made. - * @param httpResponse response received - * @return updated response. - */ - default HttpResponse beforeUnmarshalling( - final Request request, - final HttpResponse httpResponse) { - return httpResponse; - } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java index b4be341c912..8a24a4e14db 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java @@ -20,24 +20,23 @@ package org.apache.hadoop.fs.s3a.audit; import java.util.List; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyPartRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetBucketLocationRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_HTTP_GET_REQUEST; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_HTTP_HEAD_REQUEST; @@ -64,102 +63,85 @@ public class AWSRequestAnalyzer { * read/write and path. * @param request request. * @return information about the request. - * @param type of request. */ - public RequestInfo analyze(T request) { + public RequestInfo analyze(SdkRequest request) { // this is where Scala's case statement would massively // simplify life. // Please Keep in Alphabetical Order. if (request instanceof AbortMultipartUploadRequest) { return writing(MULTIPART_UPLOAD_ABORTED, - ((AbortMultipartUploadRequest) request).getKey(), + ((AbortMultipartUploadRequest) request).key(), 0); } else if (request instanceof CompleteMultipartUploadRequest) { CompleteMultipartUploadRequest r = (CompleteMultipartUploadRequest) request; return writing(MULTIPART_UPLOAD_COMPLETED, - r.getKey(), - r.getPartETags().size()); + r.key(), + r.multipartUpload().parts().size()); + } else if (request instanceof CreateMultipartUploadRequest) { + return writing(MULTIPART_UPLOAD_STARTED, + ((CreateMultipartUploadRequest) request).key(), + 0); } else if (request instanceof DeleteObjectRequest) { // DeleteObject: single object return writing(OBJECT_DELETE_REQUEST, - ((DeleteObjectRequest) request).getKey(), + ((DeleteObjectRequest) request).key(), 1); } else if (request instanceof DeleteObjectsRequest) { // DeleteObjects: bulk delete // use first key as the path DeleteObjectsRequest r = (DeleteObjectsRequest) request; - List keys - = r.getKeys(); + List objectIdentifiers + = r.delete().objects(); return writing(OBJECT_BULK_DELETE_REQUEST, - keys.isEmpty() ? null : keys.get(0).getKey(), - keys.size()); + objectIdentifiers.isEmpty() ? null : objectIdentifiers.get(0).key(), + objectIdentifiers.size()); } else if (request instanceof GetBucketLocationRequest) { GetBucketLocationRequest r = (GetBucketLocationRequest) request; return reading(STORE_EXISTS_PROBE, - r.getBucketName(), + r.bucket(), 0); - } else if (request instanceof GetObjectMetadataRequest) { - return reading(ACTION_HTTP_HEAD_REQUEST, - ((GetObjectMetadataRequest) request).getKey(), 0); } else if (request instanceof GetObjectRequest) { GetObjectRequest r = (GetObjectRequest) request; - long[] range = r.getRange(); - long size = range == null - ? -1 - : range[1] - range[0]; return reading(ACTION_HTTP_GET_REQUEST, - r.getKey(), - size); - } else if (request instanceof InitiateMultipartUploadRequest) { - return writing(MULTIPART_UPLOAD_STARTED, - ((InitiateMultipartUploadRequest) request).getKey(), - 0); + r.key(), + sizeFromRangeHeader(r.range())); + } else if (request instanceof HeadObjectRequest) { + return reading(ACTION_HTTP_HEAD_REQUEST, + ((HeadObjectRequest) request).key(), 0); } else if (request instanceof ListMultipartUploadsRequest) { ListMultipartUploadsRequest r = (ListMultipartUploadsRequest) request; return reading(MULTIPART_UPLOAD_LIST, - r.getPrefix(), - r.getMaxUploads()); + r.prefix(), + r.maxUploads()); } else if (request instanceof ListObjectsRequest) { ListObjectsRequest r = (ListObjectsRequest) request; return reading(OBJECT_LIST_REQUEST, - r.getPrefix(), - r.getMaxKeys()); - } else if (request instanceof ListNextBatchOfObjectsRequest) { - ListNextBatchOfObjectsRequest r = (ListNextBatchOfObjectsRequest) request; - ObjectListing l = r.getPreviousObjectListing(); - String prefix = ""; - int size = 0; - if (l != null) { - prefix = l.getPrefix(); - size = l.getMaxKeys(); - } - return reading(OBJECT_LIST_REQUEST, - prefix, - size); + r.prefix(), + r.maxKeys()); } else if (request instanceof ListObjectsV2Request) { ListObjectsV2Request r = (ListObjectsV2Request) request; return reading(OBJECT_LIST_REQUEST, - r.getPrefix(), - r.getMaxKeys()); + r.prefix(), + r.maxKeys()); } else if (request instanceof PutObjectRequest) { PutObjectRequest r = (PutObjectRequest) request; return writing(OBJECT_PUT_REQUEST, - r.getKey(), + r.key(), 0); } else if (request instanceof SelectObjectContentRequest) { SelectObjectContentRequest r = (SelectObjectContentRequest) request; return reading(OBJECT_SELECT_REQUESTS, - r.getKey(), + r.key(), 1); } else if (request instanceof UploadPartRequest) { UploadPartRequest r = (UploadPartRequest) request; return writing(MULTIPART_UPLOAD_PART_PUT, - r.getKey(), - r.getPartSize()); + r.key(), + r.contentLength()); } // no explicit support, return classname return writing(request.getClass().getName(), null, 0); @@ -212,7 +194,7 @@ public class AWSRequestAnalyzer { */ public static boolean isRequestNotAlwaysInSpan(final Object request) { - return request instanceof CopyPartRequest + return request instanceof UploadPartCopyRequest || request instanceof CompleteMultipartUploadRequest || request instanceof GetBucketLocationRequest; } @@ -225,9 +207,9 @@ public class AWSRequestAnalyzer { * @return true if the transfer manager creates them. */ public static boolean isRequestMultipartIO(final Object request) { - return request instanceof CopyPartRequest + return request instanceof UploadPartCopyRequest || request instanceof CompleteMultipartUploadRequest - || request instanceof InitiateMultipartUploadRequest + || request instanceof CreateMultipartUploadRequest || request instanceof UploadPartRequest; } @@ -307,4 +289,23 @@ public class AWSRequestAnalyzer { private static long toSafeLong(final Number size) { return size != null ? size.longValue() : 0; } + + private static final String BYTES_PREFIX = "bytes="; + + private static Number sizeFromRangeHeader(String rangeHeader) { + if (rangeHeader != null && rangeHeader.startsWith(BYTES_PREFIX)) { + String[] values = rangeHeader + .substring(BYTES_PREFIX.length()) + .split("-"); + if (values.length == 2) { + try { + long start = Long.parseUnsignedLong(values[0]); + long end = Long.parseUnsignedLong(values[0]); + return end - start; + } catch(NumberFormatException e) { + } + } + } + return -1; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java index c66f45eb309..352acd56092 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java @@ -23,7 +23,7 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.nio.file.AccessDeniedException; -import com.amazonaws.HandlerContextAware; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,10 +34,11 @@ import org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor; import org.apache.hadoop.fs.s3a.audit.impl.NoopAuditManagerS3A; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; + import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED_DEFAULT; -import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_HANDLER_CONTEXT; +import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_EXECUTION_ATTRIBUTE; /** * Support for integrating auditing within the S3A code. @@ -123,25 +124,24 @@ public final class AuditIntegration { } /** - * Get the span from a handler context. - * @param request request - * @param type of request. + * Get the span from the execution attributes. + * @param executionAttributes the execution attributes * @return the span callbacks or null */ - public static AWSAuditEventCallbacks - retrieveAttachedSpan(final T request) { - return request.getHandlerContext(AUDIT_SPAN_HANDLER_CONTEXT); + public static AuditSpanS3A + retrieveAttachedSpan(final ExecutionAttributes executionAttributes) { + return executionAttributes.getAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE); } /** - * Attach a span to a handler context. - * @param request request + * Attach a span to the execution attributes. + * @param executionAttributes the execution attributes * @param span span to attach - * @param type of request. */ - public static void attachSpanToRequest( - final T request, final AWSAuditEventCallbacks span) { - request.addHandlerContext(AUDIT_SPAN_HANDLER_CONTEXT, span); + public static void attachSpanToRequest( + final ExecutionAttributes executionAttributes, + final AuditSpanS3A span) { + executionAttributes.putAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE, span); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java index c1302d57454..bb7f94cfc20 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java @@ -21,8 +21,8 @@ package org.apache.hadoop.fs.s3a.audit; import java.io.IOException; import java.util.List; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource; import org.apache.hadoop.fs.store.audit.AuditSpanSource; import org.apache.hadoop.service.Service; + /** * Interface for Audit Managers auditing operations through the * AWS libraries. @@ -56,24 +57,24 @@ public interface AuditManagerS3A extends Service, OperationAuditor getAuditor(); /** - * Create the request handler(s) for this audit service. - * The list returned is mutable; new handlers may be added. - * @return list of handlers for the SDK. + * Create the execution interceptor(s) for this audit service. + * The list returned is mutable; new interceptors may be added. + * @return list of interceptors for the SDK. * @throws IOException failure. */ - List createRequestHandlers() throws IOException; + List createExecutionInterceptors() throws IOException; /** - * Return a transfer state change callback which + * Return a transfer callback which * fixes the active span context to be that in which - * the state change listener was created. + * the transfer listener was created. * This can be used to audit the creation of the multipart * upload initiation request which the transfer manager * makes when a file to be copied is split up. * This must be invoked/used within the active span. - * @return a state change listener. + * @return a transfer listener. */ - TransferStateChangeListener createStateChangeListener(); + TransferListener createTransferListener(); /** * Check for permission to access a path. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java index 1d76833f8ce..55deb0a1989 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java @@ -66,13 +66,20 @@ public final class S3AAuditConstants { "org.apache.hadoop.fs.s3a.audit.impl.NoopAuditor"; /** - * List of extra AWS SDK request handlers: {@value}. - * These are added to the SDK request chain after - * any audit service. + * Deprecated list of extra AWS SDK request handlers: {@value}. + * Use {@link #AUDIT_EXECUTION_INTERCEPTORS} instead. */ public static final String AUDIT_REQUEST_HANDLERS = "fs.s3a.audit.request.handlers"; + /** + * List of extra AWS SDK execution interceptors: {@value}. + * These are added to the SDK request chain after + * any audit service. + */ + public static final String AUDIT_EXECUTION_INTERCEPTORS = + "fs.s3a.audit.execution.interceptors"; + /** * Should operations outside spans be rejected? * This is for testing coverage of the span code; if used diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java index ffd08d37e4a..0a30654df77 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java @@ -25,16 +25,14 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.HandlerContextAware; -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.SdkBaseException; -import com.amazonaws.handlers.HandlerAfterAttemptContext; -import com.amazonaws.handlers.HandlerBeforeAttemptContext; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.http.HttpResponse; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.SdkResponse; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.http.SdkHttpResponse; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,6 +54,7 @@ import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; import org.apache.hadoop.fs.s3a.audit.OperationAuditor; import org.apache.hadoop.fs.s3a.audit.OperationAuditorOptions; import org.apache.hadoop.fs.s3a.audit.S3AAuditConstants; +import org.apache.hadoop.fs.s3a.impl.V2Migration; import org.apache.hadoop.fs.store.LogExactlyOnce; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; import org.apache.hadoop.service.CompositeService; @@ -66,6 +65,7 @@ import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_FAILURE; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.attachSpanToRequest; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.retrieveAttachedSpan; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; /** @@ -82,10 +82,11 @@ import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HAN * will deactivate the wrapped span and then * switch the active span to the unbounded span. * - * The inner class {@link AWSAuditEventCallbacks} is returned - * as a request handler in {@link #createRequestHandlers()}; - * this forwards all requests to the outer {@code ActiveAuditManagerS3A}, - * which then locates the active span and forwards the request. + * This class also implements {@link ExecutionInterceptor} and + * returns itself in {@link #createExecutionInterceptors()}; + * once registered with the S3 client, the implemented methods + * will be called during different parts of an SDK request lifecycle, + * which then locate the active span and forward the request. * If any such invocation raises an {@link AuditFailureException} * then the IOStatistics counter for {@code AUDIT_FAILURE} * is incremented. @@ -390,25 +391,32 @@ public final class ActiveAuditManagerS3A } /** - * Return a request handler for the AWS SDK which + * Return a list of execution interceptors for the AWS SDK which * relays to this class. - * @return a request handler. + * @return a list of execution interceptors. */ @Override - public List createRequestHandlers() + public List createExecutionInterceptors() throws IOException { // wire up the AWS SDK To call back into this class when // preparing to make S3 calls. - List requestHandlers = new ArrayList<>(); - requestHandlers.add(new SdkRequestHandler()); - // now look for any more handlers - final Class[] handlers = getConfig().getClasses(AUDIT_REQUEST_HANDLERS); + List executionInterceptors = new ArrayList<>(); + executionInterceptors.add(this); + + final String handlers = getConfig().get(AUDIT_REQUEST_HANDLERS); if (handlers != null) { - for (Class handler : handlers) { + V2Migration.v1RequestHandlersUsed(); + } + + // TODO: should we remove this and use Global/Service interceptors, see: + // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html + final Class[] interceptors = getConfig().getClasses(AUDIT_EXECUTION_INTERCEPTORS); + if (interceptors != null) { + for (Class handler : interceptors) { try { Constructor ctor = handler.getConstructor(); - requestHandlers.add((RequestHandler2)ctor.newInstance()); + executionInterceptors.add((ExecutionInterceptor) ctor.newInstance()); } catch (ExceptionInInitializerError e) { throw FutureIO.unwrapInnerException(e); } catch (Exception e) { @@ -416,13 +424,18 @@ public final class ActiveAuditManagerS3A } } } - return requestHandlers; + return executionInterceptors; } @Override - public TransferStateChangeListener createStateChangeListener() { + public TransferListener createTransferListener() { final WrappingAuditSpan span = activeSpan(); - return (transfer, state) -> switchToActiveSpan(span); + return new TransferListener() { + @Override + public void transferInitiated(Context.TransferInitiated context) { + switchToActiveSpan(span); + } + }; } @Override @@ -434,20 +447,18 @@ public final class ActiveAuditManagerS3A } /** - * Attach a reference to the active thread span, then - * invoke the same callback on that active thread. + * Audit the creation of a request and retrieve + * a reference to the active thread span. */ @Override - public T requestCreated( - final T request) { + public void requestCreated(final SdkRequest.Builder builder) { AuditSpanS3A span = getActiveAuditSpan(); if (LOG.isTraceEnabled()) { LOG.trace("Created Request {} in span {}", - analyzer.analyze(request), span); + analyzer.analyze(builder.build()), span); } - attachSpanToRequest(request, span); try { - return span.requestCreated(request); + span.requestCreated(builder); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -463,14 +474,13 @@ public final class ActiveAuditManagerS3A * {@inheritDoc} */ @Override - public T beforeExecution( - final T request) { + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { ioStatisticsStore.incrementCounter(AUDIT_REQUEST_EXECUTION.getSymbol()); - - // identify the span and invoke the callback + AuditSpanS3A span = getActiveAuditSpan(); + attachSpanToRequest(executionAttributes, span); try { - return extractAndActivateSpanFromRequest(request) - .beforeExecution(request); + span.beforeExecution(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -479,16 +489,14 @@ public final class ActiveAuditManagerS3A /** * Forward to active span. - * @param request request - * @param response response. + * {@inheritDoc} */ @Override - public void afterResponse(final Request request, - final Response response) - throws AuditFailureException, SdkBaseException { + public void afterExecution(Context.AfterExecution context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request) - .afterResponse(request, response); + extractAndActivateSpanFromRequest(context.request(), executionAttributes) + .afterExecution(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -496,18 +504,19 @@ public final class ActiveAuditManagerS3A } /** - * Get the active span from the handler context, + * Get the active span from the execution attributes, * falling back to the active thread span if there - * is nothing in the context. - * Provided the span is a wrapped span, the + * is nothing in the attributes. + * Provided the span is a wrapped span, the span is + * activated. * @param request request - * @param type of request. - * @return the callbacks + * @param executionAttributes the execution attributes + * @return the active span */ - private AWSAuditEventCallbacks - extractAndActivateSpanFromRequest(final T request) { - AWSAuditEventCallbacks span; - span = retrieveAttachedSpan(request); + private AuditSpanS3A extractAndActivateSpanFromRequest( + final SdkRequest request, + final ExecutionAttributes executionAttributes) { + AuditSpanS3A span = retrieveAttachedSpan(executionAttributes); if (span == null) { // no span is attached. Not unusual for the copy operations, // or for calls to GetBucketLocation made by the AWS client @@ -530,18 +539,16 @@ public final class ActiveAuditManagerS3A /** * Forward to active span. - * @param request request - * @param response response. - * @param exception exception raised. + * @param context execution context + * @param executionAttributes the execution attributes + * {@inheritDoc} */ @Override - public void afterError(final Request request, - final Response response, - final Exception exception) - throws AuditFailureException, SdkBaseException { + public void onExecutionFailure(Context.FailedExecution context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request) - .afterError(request, response, exception); + extractAndActivateSpanFromRequest(context.request(), executionAttributes).onExecutionFailure( + context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -549,11 +556,12 @@ public final class ActiveAuditManagerS3A } @Override - public AmazonWebServiceRequest beforeMarshalling( - final AmazonWebServiceRequest request) { + public SdkRequest modifyRequest(Context.ModifyRequest context, + ExecutionAttributes executionAttributes) { try { - return extractAndActivateSpanFromRequest(request) - .beforeMarshalling(request); + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyRequest(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -561,10 +569,12 @@ public final class ActiveAuditManagerS3A } @Override - public void beforeRequest(final Request request) { + public void beforeMarshalling(Context.BeforeMarshalling context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request) - .beforeRequest(request); + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .beforeMarshalling(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -572,10 +582,12 @@ public final class ActiveAuditManagerS3A } @Override - public void beforeAttempt(final HandlerBeforeAttemptContext context) { + public void afterMarshalling(Context.AfterMarshalling context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(context.getRequest()) - .beforeAttempt(context); + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .afterMarshalling(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -583,10 +595,12 @@ public final class ActiveAuditManagerS3A } @Override - public void afterAttempt(final HandlerAfterAttemptContext context) { + public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(context.getRequest()) - .afterAttempt(context); + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyHttpRequest(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -594,73 +608,80 @@ public final class ActiveAuditManagerS3A } @Override - public HttpResponse beforeUnmarshalling(final Request request, - final HttpResponse httpResponse) { + public void beforeTransmission(Context.BeforeTransmission context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request.getOriginalRequest()) - .beforeUnmarshalling(request, httpResponse); + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .beforeTransmission(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; } - return httpResponse; } - /** - * Callbacks from the AWS SDK; all forward to the ActiveAuditManagerS3A. - * We need a separate class because the SDK requires the handler list - * to be list of {@code RequestHandler2} instances. - */ - private class SdkRequestHandler extends RequestHandler2 { - - @Override - public AmazonWebServiceRequest beforeExecution( - final AmazonWebServiceRequest request) { - return ActiveAuditManagerS3A.this.beforeExecution(request); + @Override + public void afterTransmission(Context.AfterTransmission context, + ExecutionAttributes executionAttributes) { + try { + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .afterTransmission(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public void afterResponse(final Request request, - final Response response) { - ActiveAuditManagerS3A.this.afterResponse(request, response); + @Override + public SdkHttpResponse modifyHttpResponse(Context.ModifyHttpResponse context, + ExecutionAttributes executionAttributes) { + try { + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyHttpResponse(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public void afterError(final Request request, - final Response response, - final Exception e) { - ActiveAuditManagerS3A.this.afterError(request, response, e); + @Override + public void beforeUnmarshalling(Context.BeforeUnmarshalling context, + ExecutionAttributes executionAttributes) { + try { + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .beforeUnmarshalling(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public AmazonWebServiceRequest beforeMarshalling( - final AmazonWebServiceRequest request) { - return ActiveAuditManagerS3A.this.beforeMarshalling(request); + @Override + public void afterUnmarshalling(Context.AfterUnmarshalling context, + ExecutionAttributes executionAttributes) { + try { + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .afterUnmarshalling(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public void beforeRequest(final Request request) { - ActiveAuditManagerS3A.this.beforeRequest(request); - } - - @Override - public void beforeAttempt( - final HandlerBeforeAttemptContext context) { - ActiveAuditManagerS3A.this.beforeAttempt(context); - } - - @Override - public HttpResponse beforeUnmarshalling( - final Request request, - final HttpResponse httpResponse) { - return ActiveAuditManagerS3A.this.beforeUnmarshalling(request, - httpResponse); - } - - @Override - public void afterAttempt( - final HandlerAfterAttemptContext context) { - ActiveAuditManagerS3A.this.afterAttempt(context); + @Override + public SdkResponse modifyResponse(Context.ModifyResponse context, + ExecutionAttributes executionAttributes) { + try { + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyResponse(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } } @@ -748,9 +769,8 @@ public final class ActiveAuditManagerS3A * {@inheritDoc} */ @Override - public T requestCreated( - final T request) { - return span.requestCreated(request); + public void requestCreated(final SdkRequest.Builder builder) { + span.requestCreated(builder); } /** @@ -774,79 +794,132 @@ public final class ActiveAuditManagerS3A /** * Forward to the inner span. - * @param request request - * @param type of request - * @return an updated request. + * {@inheritDoc} */ @Override - public T beforeExecution( - final T request) { - return span.beforeExecution(request); + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { + span.beforeExecution(context, executionAttributes); } /** * Forward to the inner span. - * @param request request - * @param response response. + * {@inheritDoc} */ @Override - public void afterResponse(final Request request, - final Response response) { - span.afterResponse(request, response); + public void afterExecution(Context.AfterExecution context, + ExecutionAttributes executionAttributes) { + span.afterExecution(context, executionAttributes); } /** * Forward to the inner span. - * @param request request - * @param response response. - * @param exception exception raised. + * {@inheritDoc} */ @Override - public void afterError(final Request request, - final Response response, - final Exception exception) { - span.afterError(request, response, exception); + public void onExecutionFailure(Context.FailedExecution context, + ExecutionAttributes executionAttributes) { + span.onExecutionFailure(context, executionAttributes); } /** * Forward to the inner span. - * @param request request - * @return request to marshall + * {@inheritDoc} */ @Override - public AmazonWebServiceRequest beforeMarshalling( - final AmazonWebServiceRequest request) { - return span.beforeMarshalling(request); + public void beforeMarshalling(Context.BeforeMarshalling context, + ExecutionAttributes executionAttributes) { + span.beforeMarshalling(context, executionAttributes); } /** * Forward to the inner span. - * @param request request + * {@inheritDoc} */ @Override - public void beforeRequest(final Request request) { - span.beforeRequest(request); + public SdkRequest modifyRequest(Context.ModifyRequest context, + ExecutionAttributes executionAttributes) { + return span.modifyRequest(context, executionAttributes); } /** * Forward to the inner span. - * @param context full context, including the request. + * {@inheritDoc} */ @Override - public void beforeAttempt( - final HandlerBeforeAttemptContext context) { - span.beforeAttempt(context); + public void afterMarshalling(Context.AfterMarshalling context, + ExecutionAttributes executionAttributes) { + span.afterMarshalling(context, executionAttributes); } /** * Forward to the inner span. - * - * @param context full context, including the request. + * {@inheritDoc} */ @Override - public void afterAttempt( - final HandlerAfterAttemptContext context) { - span.afterAttempt(context); + public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, + ExecutionAttributes executionAttributes) { + return span.modifyHttpRequest(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void beforeTransmission(Context.BeforeTransmission context, + ExecutionAttributes executionAttributes) { + span.beforeTransmission(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void afterTransmission(Context.AfterTransmission context, + ExecutionAttributes executionAttributes) { + span.afterTransmission(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public SdkHttpResponse modifyHttpResponse(Context.ModifyHttpResponse context, + ExecutionAttributes executionAttributes) { + return span.modifyHttpResponse(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void beforeUnmarshalling(Context.BeforeUnmarshalling context, + ExecutionAttributes executionAttributes) { + span.beforeUnmarshalling(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void afterUnmarshalling(Context.AfterUnmarshalling context, + ExecutionAttributes executionAttributes) { + span.afterUnmarshalling(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public SdkResponse modifyResponse(Context.ModifyResponse context, + ExecutionAttributes executionAttributes) { + return span.modifyResponse(context, executionAttributes); } @Override @@ -859,5 +932,4 @@ public final class ActiveAuditManagerS3A return sb.toString(); } } - } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java index fcf2015487c..3a2d9d7f823 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java @@ -24,10 +24,14 @@ import java.util.Collection; import java.util.HashMap; import java.util.Map; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.awscore.AwsExecutionAttribute; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -258,21 +262,22 @@ public class LoggingAuditor /** * Attach Range of data for GetObject Request. - * @param request given get object request + * @param request the sdk request to be modified + * @param executionAttributes execution attributes for this request */ - private void attachRangeFromRequest(AmazonWebServiceRequest request) { - if (request instanceof GetObjectRequest) { - long[] rangeValue = ((GetObjectRequest) request).getRange(); - if (rangeValue == null || rangeValue.length == 0) { - return; + private void attachRangeFromRequest(SdkHttpRequest request, + ExecutionAttributes executionAttributes) { + + String operationName = executionAttributes.getAttribute(AwsExecutionAttribute.OPERATION_NAME); + + if (operationName != null && operationName.equals("GetObject")) { + if (request.headers() != null && request.headers().get("Range") != null) { + String[] rangeHeader = request.headers().get("Range").get(0).split("="); + // only set header if range unit is bytes + if (rangeHeader[0].equals("bytes")) { + referrer.set(AuditConstants.PARAM_RANGE, rangeHeader[1]); + } } - if (rangeValue.length != 2) { - WARN_INCORRECT_RANGE.warn("Expected range to contain 0 or 2 elements." - + " Got {} elements. Ignoring.", rangeValue.length); - return; - } - String combinedRangeValue = String.format("%d-%d", rangeValue[0], rangeValue[1]); - referrer.set(AuditConstants.PARAM_RANGE, combinedRangeValue); } } @@ -346,64 +351,78 @@ public class LoggingAuditor referrer.set(key, value); } + + /** - * Before execution, the logging auditor always builds - * the referrer header, saves to the outer class - * (where {@link #getLastHeader()} can retrieve it, + * Before transmitting a request, the logging auditor + * always builds the referrer header, saves to the outer + * class (where {@link #getLastHeader()} can retrieve it, * and logs at debug. * If configured to add the header to the S3 logs, it will * be set as the HTTP referrer. - * @param request request - * @param type of request. - * @return the request with any extra headers. + * @param context The current state of the execution, + * including the SDK and current HTTP request. + * @param executionAttributes A mutable set of attributes scoped + * to one specific request/response + * cycle that can be used to give data + * to future lifecycle methods. + * @return The potentially-modified HTTP request that should be + * sent to the service. Must not be null. */ @Override - public T beforeExecution( - final T request) { + public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, + ExecutionAttributes executionAttributes) { + SdkHttpRequest httpRequest = context.httpRequest(); + SdkRequest sdkRequest = context.request(); + // attach range for GetObject requests - attachRangeFromRequest(request); + attachRangeFromRequest(httpRequest, executionAttributes); + // for delete op, attach the number of files to delete - attachDeleteKeySizeAttribute(request); + attachDeleteKeySizeAttribute(sdkRequest); + // build the referrer header final String header = referrer.buildHttpReferrer(); // update the outer class's field. setLastHeader(header); if (headerEnabled) { // add the referrer header - request.putCustomRequestHeader(HEADER_REFERRER, - header); + httpRequest = httpRequest.toBuilder() + .appendHeader(HEADER_REFERRER, header) + .build(); } if (LOG.isDebugEnabled()) { LOG.debug("[{}] {} Executing {} with {}; {}", currentThreadID(), getSpanId(), getOperationName(), - analyzer.analyze(request), + analyzer.analyze(context.request()), header); } + // now see if the request is actually a blocked multipart request - if (!isMultipartUploadEnabled && isRequestMultipartIO(request)) { + if (!isMultipartUploadEnabled && isRequestMultipartIO(sdkRequest)) { throw new AuditOperationRejectedException("Multipart IO request " - + request + " rejected " + header); + + sdkRequest + " rejected " + header); } - return request; + return httpRequest; } /** * For delete requests, attach delete key size as a referrer attribute. * * @param request the request object. - * @param type of the request. */ - private void attachDeleteKeySizeAttribute(T request) { + private void attachDeleteKeySizeAttribute(SdkRequest request) { + if (request instanceof DeleteObjectsRequest) { - int keySize = ((DeleteObjectsRequest) request).getKeys().size(); - this.set(DELETE_KEYS_SIZE, String.valueOf(keySize)); + int keySize = ((DeleteObjectsRequest) request).delete().objects().size(); + referrer.set(DELETE_KEYS_SIZE, String.valueOf(keySize)); } else if (request instanceof DeleteObjectRequest) { - String key = ((DeleteObjectRequest) request).getKey(); + String key = ((DeleteObjectRequest) request).key(); if (key != null && key.length() > 0) { - this.set(DELETE_KEYS_SIZE, "1"); + referrer.set(DELETE_KEYS_SIZE, "1"); } } } @@ -460,15 +479,13 @@ public class LoggingAuditor } @Override - public T requestCreated( - final T request) { + public void requestCreated(final SdkRequest.Builder builder) { String error = "Creating a request outside an audit span " - + analyzer.analyze(request); + + analyzer.analyze(builder.build()); LOG.info(error); if (LOG.isDebugEnabled()) { LOG.debug(error, new AuditFailureException("unaudited")); } - return request; } /** @@ -476,20 +493,22 @@ public class LoggingAuditor * increment the failure count. * Some requests (e.g. copy part) are not expected in spans due * to how they are executed; these do not trigger failures. - * @param request request - * @param type of request - * @return an updated request. - * @throws AuditFailureException if failure is enabled. + * @param context The current state of the execution, including + * the unmodified SDK request from the service + * client call. + * @param executionAttributes A mutable set of attributes scoped + * to one specific request/response + * cycle that can be used to give data + * to future lifecycle methods. */ @Override - public T beforeExecution( - final T request) { - + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { String error = "executing a request outside an audit span " - + analyzer.analyze(request); + + analyzer.analyze(context.request()); final String unaudited = getSpanId() + " " + UNAUDITED_OPERATION + " " + error; - if (isRequestNotAlwaysInSpan(request)) { + if (isRequestNotAlwaysInSpan(context.request())) { // can get by auditing during a copy, so don't overreact LOG.debug(unaudited); } else { @@ -500,7 +519,7 @@ public class LoggingAuditor } } // now hand off to the superclass for its normal preparation - return super.beforeExecution(request); + super.beforeExecution(context, executionAttributes); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java index d1ebd922e07..e58c906460d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java @@ -24,9 +24,8 @@ import java.util.ArrayList; import java.util.List; import java.util.UUID; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.s3.transfer.Transfer; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -39,6 +38,7 @@ import org.apache.hadoop.fs.s3a.audit.OperationAuditor; import org.apache.hadoop.fs.s3a.audit.OperationAuditorOptions; import org.apache.hadoop.service.CompositeService; + import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; /** @@ -121,17 +121,13 @@ public class NoopAuditManagerS3A extends CompositeService } @Override - public List createRequestHandlers() throws IOException { + public List createExecutionInterceptors() throws IOException { return new ArrayList<>(); } @Override - public TransferStateChangeListener createStateChangeListener() { - return new TransferStateChangeListener() { - public void transferStateChanged(final Transfer transfer, - final Transfer.TransferState state) { - } - }; + public TransferListener createTransferListener() { + return new TransferListener() {}; } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java index f82e3d7f1e5..c170a2be661 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java @@ -18,10 +18,10 @@ package org.apache.hadoop.fs.s3a.audit.impl; -import com.amazonaws.handlers.HandlerContextKey; +import software.amazon.awssdk.core.interceptor.ExecutionAttribute; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks; +import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; /** * Internal constants; not intended for public use, or @@ -34,11 +34,11 @@ public final class S3AInternalAuditConstants { } /** - * Handler key for audit span callbacks. - * This is used to bind the handler in the AWS code. + * Exceution attribute for audit span callbacks. + * This is used to retrieve the span in the AWS code. */ - public static final HandlerContextKey - AUDIT_SPAN_HANDLER_CONTEXT = - new HandlerContextKey<>( - "org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks"); + public static final ExecutionAttribute + AUDIT_SPAN_EXECUTION_ATTRIBUTE = + new ExecutionAttribute<>( + "org.apache.hadoop.fs.s3a.audit.AuditSpanS3A"); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java index 1815285738b..4754427a4b1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java @@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.auth; import javax.annotation.Nullable; import java.net.URI; -import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.apache.hadoop.conf.Configuration; @@ -29,12 +29,9 @@ import org.apache.hadoop.conf.Configuration; * Base class for AWS credential providers which * take a URI and config in their constructor. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ -@Deprecated public abstract class AbstractAWSCredentialProvider - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { private final URI binding; @@ -65,10 +62,4 @@ public abstract class AbstractAWSCredentialProvider return binding; } - /** - * Refresh is a no-op by default. - */ - @Override - public void refresh() { - } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java index 2cdf0880aff..4ef9d49df1a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java @@ -23,8 +23,8 @@ import java.net.URI; import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; -import com.amazonaws.SdkBaseException; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; @@ -33,19 +33,17 @@ import org.apache.hadoop.fs.s3a.CredentialInitializationException; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; + /** * Base class for session credential support. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Private -@Deprecated public abstract class AbstractSessionCredentialsProvider extends AbstractAWSCredentialProvider { /** Credentials, created in {@link #init()}. */ - private volatile AWSCredentials awsCredentials; + private volatile AwsCredentials awsCredentials; /** Atomic flag for on-demand initialization. */ private final AtomicBoolean initialized = new AtomicBoolean(false); @@ -105,7 +103,7 @@ public abstract class AbstractSessionCredentialsProvider * @return the credentials * @throws IOException on any failure. */ - protected abstract AWSCredentials createCredentials(Configuration config) + protected abstract AwsCredentials createCredentials(Configuration config) throws IOException; /** @@ -115,10 +113,10 @@ public abstract class AbstractSessionCredentialsProvider * is thrown here before any attempt to return the credentials * is made. * @return credentials, if set. - * @throws SdkBaseException if one was raised during init + * @throws SdkException if one was raised during init * @throws CredentialInitializationException on other failures. */ - public AWSCredentials getCredentials() throws SdkBaseException { + public AwsCredentials resolveCredentials() throws SdkException { // do an on-demand init then raise an AWS SDK exception if // there was a failure. try { @@ -126,8 +124,8 @@ public abstract class AbstractSessionCredentialsProvider init(); } } catch (IOException e) { - if (e.getCause() instanceof SdkBaseException) { - throw (SdkBaseException) e.getCause(); + if (e.getCause() instanceof SdkException) { + throw (SdkException) e.getCause(); } else { throw new CredentialInitializationException(e.getMessage(), e); } @@ -165,15 +163,16 @@ public abstract class AbstractSessionCredentialsProvider * This will be interpreted as "this provider has no credentials to offer", * rather than an explicit error or anonymous access. */ - protected static final class NoCredentials implements AWSCredentials { + protected static final class NoCredentials implements AwsCredentials { @Override - public String getAWSAccessKeyId() { + public String accessKeyId() { return null; } @Override - public String getAWSSecretKey() { + public String secretAccessKey() { return null; } } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index fe816ecfcd9..52a23f857a5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -26,12 +26,14 @@ import java.util.Arrays; import java.util.Locale; import java.util.concurrent.TimeUnit; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.StsException; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.slf4j.Logger; @@ -51,7 +53,7 @@ import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider; import org.apache.hadoop.security.UserGroupInformation; import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; /** * Support IAM Assumed roles by instantiating an instance of @@ -61,13 +63,10 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList; * * Classname is used in configuration files; do not move. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Evolving -@Deprecated -public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, +public class AssumedRoleCredentialProvider implements AwsCredentialsProvider, Closeable { private static final Logger LOG = @@ -78,7 +77,7 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, public static final String E_NO_ROLE = "Unset property " + ASSUMED_ROLE_ARN; - private final STSAssumeRoleSessionCredentialsProvider stsProvider; + private final StsAssumeRoleCredentialsProvider stsProvider; private final String sessionName; @@ -90,15 +89,17 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, private final Invoker invoker; + private final StsClient stsClient; + /** * Instantiate. - * This calls {@link #getCredentials()} to fail fast on the inner + * This calls {@link #resolveCredentials()} to fail fast on the inner * role credential retrieval. * @param fsUri possibly null URI of the filesystem. * @param conf configuration * @throws IOException on IO problems and some parameter checking * @throws IllegalArgumentException invalid parameters - * @throws AWSSecurityTokenServiceException problems getting credentials + * @throws StsException problems getting credentials */ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) throws IOException { @@ -125,29 +126,31 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, String policy = conf.getTrimmed(ASSUMED_ROLE_POLICY, ""); LOG.debug("{}", this); - STSAssumeRoleSessionCredentialsProvider.Builder builder - = new STSAssumeRoleSessionCredentialsProvider.Builder(arn, sessionName); - builder.withRoleSessionDurationSeconds((int) duration); + + AssumeRoleRequest.Builder requestBuilder = + AssumeRoleRequest.builder().roleArn(arn).roleSessionName(sessionName) + .durationSeconds((int) duration); + if (StringUtils.isNotEmpty(policy)) { LOG.debug("Scope down policy {}", policy); - builder.withScopeDownPolicy(policy); + requestBuilder.policy(policy); } + String endpoint = conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT, ""); String region = conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT_REGION, ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT); - AWSSecurityTokenServiceClientBuilder stsbuilder = + stsClient = STSClientFactory.builder( conf, fsUri != null ? fsUri.getHost() : "", credentialsToSTS, endpoint, - region); - // the STS client is not tracked for a shutdown in close(), because it - // (currently) throws an UnsupportedOperationException in shutdown(). - builder.withStsClient(stsbuilder.build()); + region).build(); //now build the provider - stsProvider = builder.build(); + stsProvider = StsAssumeRoleCredentialsProvider.builder() + .refreshRequest(requestBuilder.build()) + .stsClient(stsClient).build(); // to handle STS throttling by the AWS account, we // need to retry @@ -155,21 +158,21 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, // and force in a fail-fast check just to keep the stack traces less // convoluted - getCredentials(); + resolveCredentials(); } /** * Get credentials. * @return the credentials - * @throws AWSSecurityTokenServiceException if none could be obtained. + * @throws StsException if none could be obtained. */ @Override @Retries.RetryRaw - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { try { return invoker.retryUntranslated("getCredentials", true, - stsProvider::getCredentials); + stsProvider::resolveCredentials); } catch (IOException e) { // this is in the signature of retryUntranslated; // its hard to see how this could be raised, but for @@ -178,24 +181,19 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, throw new CredentialInitializationException( "getCredentials failed: " + e, e); - } catch (AWSSecurityTokenServiceException e) { + } catch (SdkClientException e) { LOG.error("Failed to get credentials for role {}", arn, e); throw e; } } - @Override - public void refresh() { - stsProvider.refresh(); - } - /** * Propagate the close() call to the inner stsProvider. */ @Override public void close() { - S3AUtils.closeAutocloseables(LOG, stsProvider, credentialsToSTS); + S3AUtils.closeAutocloseables(LOG, stsProvider, credentialsToSTS, stsClient); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java new file mode 100644 index 00000000000..d94f8c25bad --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.io.IOException; +import java.lang.reflect.Modifier; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; + +import com.amazonaws.auth.AWSCredentialsProvider; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider; +import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider; +import org.apache.hadoop.fs.s3native.S3xLoginHelper; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_AUTH_CLASS_PREFIX; +import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; + +/** + * This class provides methods to create the list of AWS credential providers. + */ +public final class AwsCredentialListProvider { + + private AwsCredentialListProvider() { + } + + private static final Logger LOG = LoggerFactory.getLogger(AwsCredentialListProvider.class); + + public static final String NOT_AWS_PROVIDER = + "does not implement AWSCredentialsProvider"; + public static final String NOT_AWS_V2_PROVIDER = + "does not implement AwsCredentialsProvider"; + public static final String ABSTRACT_PROVIDER = + "is abstract and therefore cannot be created"; + + /** + * Error message when the AWS provider list built up contains a forbidden + * entry. + */ + @VisibleForTesting + public static final String E_FORBIDDEN_AWS_PROVIDER + = "AWS provider class cannot be used"; + + /** + * The standard AWS provider list for AWS connections. + */ + public static final List> + STANDARD_AWS_PROVIDERS = Collections.unmodifiableList( + Arrays.asList( + TemporaryAWSCredentialsProvider.class, + SimpleAWSCredentialsProvider.class, + EnvironmentVariableCredentialsProvider.class, + IAMInstanceCredentialsProvider.class)); + + /** + * Create the AWS credentials from the providers, the URI and + * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration. + * @param binding Binding URI -may be null + * @param conf filesystem configuration + * @return a credentials provider list + * @throws IOException Problems loading the providers (including reading + * secrets from credential files). + */ + public static AWSCredentialProviderList createAWSCredentialProviderSet( + @Nullable URI binding, + Configuration conf) throws IOException { + // this will reject any user:secret entries in the URI + S3xLoginHelper.rejectSecretsInURIs(binding); + AWSCredentialProviderList credentials = + buildAWSProviderList(binding, + conf, + AWS_CREDENTIALS_PROVIDER, + STANDARD_AWS_PROVIDERS, + new HashSet<>()); + // make sure the logging message strips out any auth details + LOG.debug("For URI {}, using credentials {}", + binding, credentials); + return credentials; + } + + /** + * Load list of AWS credential provider/credential provider factory classes. + * @param conf configuration + * @param key key + * @param defaultValue list of default values + * @return the list of classes, possibly empty + * @throws IOException on a failure to load the list. + */ + private static List> loadAWSProviderClasses(Configuration conf, + String key, + Class... defaultValue) throws IOException { + try { + return Arrays.asList(conf.getClasses(key, defaultValue)); + } catch (RuntimeException e) { + Throwable c = e.getCause() != null ? e.getCause() : e; + throw new IOException("From option " + key + ' ' + c, c); + } + } + + /** + * Maps V1 credential providers to either their equivalent SDK V2 class or hadoop provider. + */ + private static Map initCredentialProvidersMap() { + Map v1v2CredentialProviderMap = new HashMap<>(); + + v1v2CredentialProviderMap.put("EnvironmentVariableCredentialsProvider", + EnvironmentVariableCredentialsProvider.class); + v1v2CredentialProviderMap.put("EC2ContainerCredentialsProviderWrapper", + IAMInstanceCredentialsProvider.class); + v1v2CredentialProviderMap.put("InstanceProfileCredentialsProvider", + IAMInstanceCredentialsProvider.class); + + return v1v2CredentialProviderMap; + } + + /** + * Load list of AWS credential provider/credential provider factory classes; + * support a forbidden list to prevent loops, mandate full secrets, etc. + * @param binding Binding URI -may be null + * @param conf configuration + * @param key key + * @param forbidden a possibly empty set of forbidden classes. + * @param defaultValues list of default providers. + * @return the list of classes, possibly empty + * @throws IOException on a failure to load the list. + */ + public static AWSCredentialProviderList buildAWSProviderList( + @Nullable final URI binding, + final Configuration conf, + final String key, + final List> defaultValues, + final Set> forbidden) throws IOException { + + // build up the base provider + List> awsClasses = loadAWSProviderClasses(conf, + key, + defaultValues.toArray(new Class[defaultValues.size()])); + + Map v1v2CredentialProviderMap = initCredentialProvidersMap(); + // and if the list is empty, switch back to the defaults. + // this is to address the issue that configuration.getClasses() + // doesn't return the default if the config value is just whitespace. + if (awsClasses.isEmpty()) { + awsClasses = defaultValues; + } + // iterate through, checking for blacklists and then instantiating + // each provider + AWSCredentialProviderList providers = new AWSCredentialProviderList(); + for (Class aClass : awsClasses) { + + if (forbidden.contains(aClass)) { + throw new IOException(E_FORBIDDEN_AWS_PROVIDER + + " in option " + key + ": " + aClass); + } + + if (v1v2CredentialProviderMap.containsKey(aClass.getSimpleName()) && + aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)){ + providers.add(createAWSV2CredentialProvider(conf, + v1v2CredentialProviderMap.get(aClass.getSimpleName()), binding)); + } else if (AWSCredentialsProvider.class.isAssignableFrom(aClass)) { + providers.add(createAWSV1CredentialProvider(conf, + aClass, binding)); + } else { + providers.add(createAWSV2CredentialProvider(conf, aClass, binding)); + } + + } + return providers; + } + + /** + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
      + *
    1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
    2. + *
    3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
    4. + *
    5. a public static method named getInstance that accepts no + * arguments and returns an instance of + * com.amazonaws.auth.AWSCredentialsProvider, or
    6. + *
    7. a public default constructor.
    8. + *
    + * + * @param conf configuration + * @param credClass credential class + * @param uri URI of the FS + * @return the instantiated class + * @throws IOException on any instantiation failure. + */ + private static AWSCredentialsProvider createAWSV1CredentialProvider(Configuration conf, + Class credClass, @Nullable URI uri) throws IOException { + AWSCredentialsProvider credentials = null; + String className = credClass.getName(); + if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) { + throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER); + } + if (Modifier.isAbstract(credClass.getModifiers())) { + throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); + } + LOG.debug("Credential provider class is {}", className); + + credentials = + S3AUtils.getInstanceFromReflection(credClass, conf, uri, AWSCredentialsProvider.class, + "getInstance", AWS_CREDENTIALS_PROVIDER); + return credentials; + + } + + /** + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
      + *
    1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
    2. + *
    3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
    4. + *
    5. a public static method named getInstance that accepts no + * arguments and returns an instance of + * software.amazon.awssdk.auth.credentials.AwsCredentialsProvider, or
    6. + *
    7. a public default constructor.
    8. + *
    + * + * @param conf configuration + * @param credClass credential class + * @param uri URI of the FS + * @return the instantiated class + * @throws IOException on any instantiation failure. + */ + private static AwsCredentialsProvider createAWSV2CredentialProvider(Configuration conf, + Class credClass, @Nullable URI uri) throws IOException { + AwsCredentialsProvider credentials = null; + String className = credClass.getName(); + if (!AwsCredentialsProvider.class.isAssignableFrom(credClass)) { + throw new IOException("Class " + credClass + " " + NOT_AWS_V2_PROVIDER); + } + if (Modifier.isAbstract(credClass.getModifiers())) { + throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); + } + LOG.debug("Credential provider class is {}", className); + credentials = + S3AUtils.getInstanceFromReflection(credClass, conf, uri, AwsCredentialsProvider.class, + "create", AWS_CREDENTIALS_PROVIDER); + return credentials; + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java index ca9c518d300..f505cfcab5d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java @@ -21,10 +21,11 @@ package org.apache.hadoop.fs.s3a.auth; import java.io.Closeable; import java.io.IOException; -import com.amazonaws.AmazonClientException; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider; +import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; +import software.amazon.awssdk.core.exception.SdkClientException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -41,17 +42,14 @@ import org.apache.hadoop.classification.InterfaceStability; *

    * It is implicitly public; marked evolving as we can change its semantics. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Evolving -@Deprecated public class IAMInstanceCredentialsProvider - implements AWSCredentialsProvider, Closeable { + implements AwsCredentialsProvider, Closeable { - private final AWSCredentialsProvider provider = - new EC2ContainerCredentialsProviderWrapper(); + private final AwsCredentialsProvider containerCredentialsProvider = + ContainerCredentialsProvider.builder().build(); public IAMInstanceCredentialsProvider() { } @@ -63,19 +61,29 @@ public class IAMInstanceCredentialsProvider * @throws NoAwsCredentialsException on auth failure to indicate non-recoverable. */ @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { try { - return provider.getCredentials(); - } catch (AmazonClientException e) { + return getCredentials(); + } catch (SdkClientException e) { throw new NoAwsCredentialsException("IAMInstanceCredentialsProvider", e.getMessage(), e); } } - @Override - public void refresh() { - provider.refresh(); + /** + * First try {@link ContainerCredentialsProvider}, which will throw an exception if credentials + * cannot be retrieved from the container. Then resolve credentials + * using {@link InstanceProfileCredentialsProvider}. + * + * @return credentials + */ + private AwsCredentials getCredentials() { + try { + return containerCredentialsProvider.resolveCredentials(); + } catch (SdkClientException e) { + return InstanceProfileCredentialsProvider.create().resolveCredentials(); + } } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java index 72d29df3fe0..fe5b4753253 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java @@ -24,15 +24,13 @@ import java.util.Date; import java.util.Map; import java.util.concurrent.TimeUnit; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.model.Credentials; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.model.Credentials; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,10 +75,10 @@ public final class MarshalledCredentialBinding { public static MarshalledCredentials fromSTSCredentials( final Credentials credentials) { MarshalledCredentials marshalled = new MarshalledCredentials( - credentials.getAccessKeyId(), - credentials.getSecretAccessKey(), - credentials.getSessionToken()); - Date date = credentials.getExpiration(); + credentials.accessKeyId(), + credentials.secretAccessKey(), + credentials.sessionToken()); + Date date = Date.from(credentials.expiration()); marshalled.setExpiration(date != null ? date.getTime() : 0); return marshalled; } @@ -91,11 +89,11 @@ public final class MarshalledCredentialBinding { * @return a set of marshalled credentials. */ public static MarshalledCredentials fromAWSCredentials( - final AWSSessionCredentials credentials) { + final AwsSessionCredentials credentials) { return new MarshalledCredentials( - credentials.getAWSAccessKeyId(), - credentials.getAWSSecretKey(), - credentials.getSessionToken()); + credentials.accessKeyId(), + credentials.secretAccessKey(), + credentials.sessionToken()); } /** @@ -156,7 +154,7 @@ public final class MarshalledCredentialBinding { * @throws NoAuthWithAWSException validation failure * @throws NoAwsCredentialsException the credentials are actually empty. */ - public static AWSCredentials toAWSCredentials( + public static AwsCredentials toAWSCredentials( final MarshalledCredentials marshalled, final MarshalledCredentials.CredentialTypeRequired typeRequired, final String component) @@ -173,46 +171,49 @@ public final class MarshalledCredentialBinding { final String secretKey = marshalled.getSecretKey(); if (marshalled.hasSessionToken()) { // a session token was supplied, so return session credentials - return new BasicSessionCredentials(accessKey, secretKey, + return AwsSessionCredentials.create(accessKey, secretKey, marshalled.getSessionToken()); } else { // these are full credentials - return new BasicAWSCredentials(accessKey, secretKey); + return AwsBasicCredentials.create(accessKey, secretKey); } } /** * Request a set of credentials from an STS endpoint. * @param parentCredentials the parent credentials needed to talk to STS - * @param awsConf AWS client configuration + * @param configuration AWS client configuration * @param stsEndpoint an endpoint, use "" for none * @param stsRegion region; use if the endpoint isn't the AWS default. * @param duration duration of the credentials in seconds. Minimum value: 900. * @param invoker invoker to use for retrying the call. + * @param bucket bucket name. * @return the credentials * @throws IOException on a failure of the request */ @Retries.RetryTranslated public static MarshalledCredentials requestSessionCredentials( - final AWSCredentialsProvider parentCredentials, - final ClientConfiguration awsConf, + final AwsCredentialsProvider parentCredentials, + final Configuration configuration, final String stsEndpoint, final String stsRegion, final int duration, - final Invoker invoker) throws IOException { + final Invoker invoker, + final String bucket) throws IOException { try { - final AWSSecurityTokenService tokenService = + final StsClient tokenService = STSClientFactory.builder(parentCredentials, - awsConf, + configuration, stsEndpoint.isEmpty() ? null : stsEndpoint, - stsRegion) + stsRegion, + bucket) .build(); try (STSClientFactory.STSClient stsClient = STSClientFactory.createClientConnection( tokenService, invoker)) { return fromSTSCredentials(stsClient.requestSessionCredentials(duration, TimeUnit.SECONDS)); } - } catch (SdkClientException e) { + } catch (SdkException e) { if (stsRegion.isEmpty()) { LOG.error("Region must be provided when requesting session credentials.", e); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java index cde4cc94b78..7dffa157c04 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java @@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.auth; import java.io.IOException; import java.net.URI; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -40,7 +40,6 @@ import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.toAWSCre */ @InterfaceAudience.Private @InterfaceStability.Unstable -@SuppressWarnings("deprecation") public class MarshalledCredentialProvider extends AbstractSessionCredentialsProvider { @@ -85,7 +84,7 @@ public class MarshalledCredentialProvider extends * @throws IOException on a failure */ @Override - protected AWSCredentials createCredentials(final Configuration config) + protected AwsCredentials createCredentials(final Configuration config) throws IOException { return toAWSCredentials(credentials, typeRequired, component); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java index 7ec13b092c9..8f92153b2e1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java @@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.auth; import org.apache.hadoop.fs.s3a.CredentialInitializationException; /** - * A specific subclass of {@code AmazonClientException} which is + * A specific subclass of {@code SdkException} which is * used in the S3A retry policy to fail fast when there is any * authentication problem. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java index 4779f3c1cb0..c2a28afec60 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java @@ -20,16 +20,23 @@ package org.apache.hadoop.fs.s3a.auth; import java.io.Closeable; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.util.concurrent.TimeUnit; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.amazonaws.services.securitytoken.model.AssumeRoleRequest; -import com.amazonaws.services.securitytoken.model.Credentials; -import com.amazonaws.services.securitytoken.model.GetSessionTokenRequest; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.apache.ProxyConfiguration; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.Credentials; +import software.amazon.awssdk.services.sts.model.GetSessionTokenRequest; +import software.amazon.awssdk.thirdparty.org.apache.http.client.utils.URIBuilder; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -38,13 +45,12 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; -import org.apache.hadoop.fs.s3a.S3AUtils; import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.commons.lang3.StringUtils.isNotEmpty; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.*; /** @@ -71,17 +77,15 @@ public class STSClientFactory { * @return the builder to call {@code build()} * @throws IOException problem reading proxy secrets */ - public static AWSSecurityTokenServiceClientBuilder builder( + public static StsClientBuilder builder( final Configuration conf, final String bucket, - final AWSCredentialsProvider credentials) throws IOException { - final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket, - Constants.AWS_SERVICE_IDENTIFIER_STS); + final AwsCredentialsProvider credentials) throws IOException { String endpoint = conf.getTrimmed(DELEGATION_TOKEN_ENDPOINT, DEFAULT_DELEGATION_TOKEN_ENDPOINT); String region = conf.getTrimmed(DELEGATION_TOKEN_REGION, DEFAULT_DELEGATION_TOKEN_REGION); - return builder(credentials, awsConf, endpoint, region); + return builder(credentials, conf, endpoint, region, bucket); } /** @@ -96,64 +100,89 @@ public class STSClientFactory { * @return the builder to call {@code build()} * @throws IOException problem reading proxy secrets */ - public static AWSSecurityTokenServiceClientBuilder builder( + public static StsClientBuilder builder( final Configuration conf, final String bucket, - final AWSCredentialsProvider credentials, + final AwsCredentialsProvider credentials, final String stsEndpoint, final String stsRegion) throws IOException { - final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket, - Constants.AWS_SERVICE_IDENTIFIER_STS); - return builder(credentials, awsConf, stsEndpoint, stsRegion); + return builder(credentials, conf, stsEndpoint, stsRegion, bucket); } /** * Create the builder ready for any final configuration options. * Picks up connection settings from the Hadoop configuration, including * proxy secrets. - * @param awsConf AWS configuration. + * @param conf AWS configuration. * @param credentials AWS credential chain to use * @param stsEndpoint optional endpoint "https://sns.us-west-1.amazonaws.com" * @param stsRegion the region, e.g "us-west-1". Must be set if endpoint is. + * @param bucket bucket name * @return the builder to call {@code build()} + * @throws IOException problem reading proxy secrets */ - public static AWSSecurityTokenServiceClientBuilder builder( - final AWSCredentialsProvider credentials, - final ClientConfiguration awsConf, - final String stsEndpoint, - final String stsRegion) { - final AWSSecurityTokenServiceClientBuilder builder - = AWSSecurityTokenServiceClientBuilder.standard(); + public static StsClientBuilder builder(final AwsCredentialsProvider credentials, + final Configuration conf, final String stsEndpoint, final String stsRegion, + final String bucket) throws IOException { + final StsClientBuilder stsClientBuilder = StsClient.builder(); + Preconditions.checkArgument(credentials != null, "No credentials"); - builder.withClientConfiguration(awsConf); - builder.withCredentials(credentials); + + final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = + AWSClientConfig.createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS); + + final ApacheHttpClient.Builder httpClientBuilder = + AWSClientConfig.createHttpClientBuilder(conf); + + final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf); + + final ProxyConfiguration proxyConfig = AWSClientConfig.createProxyConfiguration(conf, bucket); + + clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()); + httpClientBuilder.proxyConfiguration(proxyConfig); + + stsClientBuilder.httpClientBuilder(httpClientBuilder) + .overrideConfiguration(clientOverrideConfigBuilder.build()) + .credentialsProvider(credentials); + boolean destIsStandardEndpoint = STS_STANDARD.equals(stsEndpoint); if (isNotEmpty(stsEndpoint) && !destIsStandardEndpoint) { - Preconditions.checkArgument( - isNotEmpty(stsRegion), - "STS endpoint is set to %s but no signing region was provided", - stsEndpoint); + Preconditions.checkArgument(isNotEmpty(stsRegion), + "STS endpoint is set to %s but no signing region was provided", stsEndpoint); LOG.debug("STS Endpoint={}; region='{}'", stsEndpoint, stsRegion); - builder.withEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration(stsEndpoint, stsRegion)); + stsClientBuilder.endpointOverride(getSTSEndpoint(stsEndpoint)).region(Region.of(stsRegion)); } else { Preconditions.checkArgument(isEmpty(stsRegion), - "STS signing region set set to %s but no STS endpoint specified", - stsRegion); + "STS signing region set set to %s but no STS endpoint specified", stsRegion); } - return builder; + return stsClientBuilder; } + /** + * Given a endpoint string, create the endpoint URI. + * + * @param endpoint possibly null endpoint. + * @return an endpoint uri + */ + private static URI getSTSEndpoint(String endpoint) { + try { + return new URIBuilder().setScheme("https").setHost(endpoint).build(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** * Create an STS Client instance. - * @param tokenService STS instance + * @param stsClient STS instance * @param invoker invoker to use * @return an STS client bonded to that interface. */ public static STSClient createClientConnection( - final AWSSecurityTokenService tokenService, + final StsClient stsClient, final Invoker invoker) { - return new STSClient(tokenService, invoker); + return new STSClient(stsClient, invoker); } /** @@ -161,21 +190,19 @@ public class STSClientFactory { */ public static final class STSClient implements Closeable { - private final AWSSecurityTokenService tokenService; + private final StsClient stsClient; private final Invoker invoker; - private STSClient(final AWSSecurityTokenService tokenService, + private STSClient(final StsClient stsClient, final Invoker invoker) { - this.tokenService = tokenService; + this.stsClient = stsClient; this.invoker = invoker; } @Override public void close() throws IOException { - // Since we are not using AbstractAWSSecurityTokenService, we - // don't need to worry about catching UnsupportedOperationException. - tokenService.shutdown(); + stsClient.close(); } /** @@ -192,13 +219,13 @@ public class STSClientFactory { final TimeUnit timeUnit) throws IOException { int durationSeconds = (int) timeUnit.toSeconds(duration); LOG.debug("Requesting session token of duration {}", duration); - final GetSessionTokenRequest request = new GetSessionTokenRequest(); - request.setDurationSeconds(durationSeconds); + final GetSessionTokenRequest request = + GetSessionTokenRequest.builder().durationSeconds(durationSeconds).build(); return invoker.retry("request session credentials", "", true, () ->{ LOG.info("Requesting Amazon STS Session credentials"); - return tokenService.getSessionToken(request).getCredentials(); + return stsClient.getSessionToken(request).credentials(); }); } @@ -222,15 +249,14 @@ public class STSClientFactory { final TimeUnit timeUnit) throws IOException { LOG.debug("Requesting role {} with duration {}; policy = {}", roleARN, duration, policy); - AssumeRoleRequest request = new AssumeRoleRequest(); - request.setDurationSeconds((int) timeUnit.toSeconds(duration)); - request.setRoleArn(roleARN); - request.setRoleSessionName(sessionName); + AssumeRoleRequest.Builder requestBuilder = + AssumeRoleRequest.builder().durationSeconds((int) timeUnit.toSeconds(duration)) + .roleArn(roleARN).roleSessionName(sessionName); if (isNotEmpty(policy)) { - request.setPolicy(policy); + requestBuilder.policy(policy); } return invoker.retry("request role credentials", "", true, - () -> tokenService.assumeRole(request).getCredentials()); + () -> stsClient.assumeRole(requestBuilder.build()).credentials()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java new file mode 100644 index 00000000000..7beabb9fa3c --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.signer.Aws4Signer; +import software.amazon.awssdk.auth.signer.Aws4UnsignedPayloadSigner; +import software.amazon.awssdk.auth.signer.AwsS3V4Signer; +import software.amazon.awssdk.core.signer.NoOpSigner; +import software.amazon.awssdk.core.signer.Signer; + +import org.apache.hadoop.fs.s3a.S3AUtils; + + +/** + * Signer factory used to register and create signers. + */ +public final class SignerFactory { + + private static final Logger LOG = LoggerFactory.getLogger(SignerFactory.class); + public static final String VERSION_FOUR_SIGNER = "AWS4SignerType"; + public static final String VERSION_FOUR_UNSIGNED_PAYLOAD_SIGNER = "AWS4UnsignedPayloadSignerType"; + public static final String NO_OP_SIGNER = "NoOpSignerType"; + private static final String S3_V4_SIGNER = "AWSS3V4SignerType"; + + private static final Map> SIGNERS + = new ConcurrentHashMap<>(); + + static { + // Register the standard signer types. + SIGNERS.put(VERSION_FOUR_SIGNER, Aws4Signer.class); + SIGNERS.put(VERSION_FOUR_UNSIGNED_PAYLOAD_SIGNER, Aws4UnsignedPayloadSigner.class); + SIGNERS.put(NO_OP_SIGNER, NoOpSigner.class); + SIGNERS.put(S3_V4_SIGNER, AwsS3V4Signer.class); + } + + + private SignerFactory() { + } + + /** + * Register an implementation class for the given signer type. + * + * @param signerType The name of the signer type to register. + * @param signerClass The class implementing the given signature protocol. + */ + public static void registerSigner( + final String signerType, + final Class signerClass) { + + if (signerType == null) { + throw new IllegalArgumentException("signerType cannot be null"); + } + if (signerClass == null) { + throw new IllegalArgumentException("signerClass cannot be null"); + } + + SIGNERS.put(signerType, signerClass); + } + + /** + * Check if the signer has already been registered. + * @param signerType signer to get + */ + public static void verifySignerRegistered(String signerType) { + Class signerClass = SIGNERS.get(signerType); + if (signerClass == null) { + throw new IllegalArgumentException("unknown signer type: " + signerType); + } + } + + + /** + * Create an instance of the given signer. + * + * @param signerType The signer type. + * @param configKey Config key used to configure the signer. + * @return The new signer instance. + * @throws IOException on any problem. + */ + public static Signer createSigner(String signerType, String configKey) throws IOException { + Class signerClass = SIGNERS.get(signerType); + String className = signerClass.getName(); + + LOG.debug("Signer class is {}", className); + + Signer signer = + S3AUtils.getInstanceFromReflection(signerClass, null, null, Signer.class, "create", + configKey); + + return signer; + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java index e162428787c..198bc66133a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java @@ -22,14 +22,12 @@ import java.io.IOException; import java.util.LinkedList; import java.util.List; -import com.amazonaws.auth.Signer; -import com.amazonaws.auth.SignerFactory; +import software.amazon.awssdk.core.signer.Signer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationTokenProvider; -import org.apache.hadoop.fs.s3a.impl.V2Migration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; @@ -71,8 +69,6 @@ public class SignerManager implements Closeable { return; } - V2Migration.v1CustomSignerUsed(); - for (String customSigner : customSigners) { String[] parts = customSigner.split(":"); if (!(parts.length == 1 || parts.length == 2 || parts.length == 3)) { @@ -119,7 +115,7 @@ public class SignerManager implements Closeable { private static void maybeRegisterSigner(String signerName, String signerClassName, Configuration conf) { try { - SignerFactory.getSignerByTypeAndService(signerName, null); + SignerFactory.verifySignerRegistered(signerName); } catch (IllegalArgumentException e) { // Signer is not registered with the AWS SDK. // Load the class and register the signer. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java index 6526f9a9478..bcd358e2d16 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java @@ -20,9 +20,6 @@ package org.apache.hadoop.fs.s3a.auth.delegation; import java.util.Optional; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; - import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; /** @@ -35,37 +32,30 @@ public final class EncryptionSecretOperations { private EncryptionSecretOperations() { } - /** - * Create SSE-C client side key encryption options on demand. - * @return an optional key to attach to a request. + /*** + * Gets the SSE-C client side key if present. + * * @param secrets source of the encryption secrets. + * @return an optional key to attach to a request. */ - public static Optional createSSECustomerKey( - final EncryptionSecrets secrets) { - if (secrets.hasEncryptionKey() && - secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_C) { - return Optional.of(new SSECustomerKey(secrets.getEncryptionKey())); + public static Optional getSSECustomerKey(final EncryptionSecrets secrets) { + if (secrets.hasEncryptionKey() && secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_C) { + return Optional.of(secrets.getEncryptionKey()); } else { return Optional.empty(); } } /** - * Create SSE-KMS options for a request, iff the encryption is SSE-KMS. - * @return an optional SSE-KMS param to attach to a request. + * Gets the SSE-KMS key if present, else let S3 use AWS managed key. + * * @param secrets source of the encryption secrets. + * @return an optional key to attach to a request. */ - public static Optional createSSEAwsKeyManagementParams( - final EncryptionSecrets secrets) { - - //Use specified key, otherwise default to default master aws/s3 key by AWS - if (secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS) { - if (secrets.hasEncryptionKey()) { - return Optional.of(new SSEAwsKeyManagementParams( - secrets.getEncryptionKey())); - } else { - return Optional.of(new SSEAwsKeyManagementParams()); - } + public static Optional getSSEAwsKMSKey(final EncryptionSecrets secrets) { + if (secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS + && secrets.hasEncryptionKey()) { + return Optional.of(secrets.getEncryptionKey()); } else { return Optional.empty(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java index 8a9ee30f64e..75d907aac81 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java @@ -23,7 +23,7 @@ import java.util.Optional; import java.util.UUID; import java.util.concurrent.TimeUnit; -import com.amazonaws.services.securitytoken.model.Credentials; +import software.amazon.awssdk.services.sts.model.Credentials; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java index 87fbdf53ccb..780c1317788 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java @@ -26,21 +26,18 @@ import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import software.amazon.awssdk.services.sts.StsClient; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; -import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3ARetryPolicy; -import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentialProvider; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentials; import org.apache.hadoop.fs.s3a.auth.RoleModel; @@ -50,8 +47,8 @@ import org.apache.hadoop.io.Text; import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; import static org.apache.hadoop.fs.s3a.Invoker.once; -import static org.apache.hadoop.fs.s3a.S3AUtils.STANDARD_AWS_PROVIDERS; -import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromAWSCredentials; import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromSTSCredentials; import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.*; @@ -292,23 +289,22 @@ public class SessionTokenBinding extends AbstractDelegationTokenBinding { // chain. // As no codepath (session propagation, STS creation) will work, // throw this. - final AWSCredentials parentCredentials = once("get credentials", + final AwsCredentials parentCredentials = once("get credentials", "", - () -> parentAuthChain.getCredentials()); - hasSessionCreds = parentCredentials instanceof AWSSessionCredentials; + () -> parentAuthChain.resolveCredentials()); + hasSessionCreds = parentCredentials instanceof AwsSessionCredentials; if (!hasSessionCreds) { LOG.debug("Creating STS client for {}", getDescription()); invoker = new Invoker(new S3ARetryPolicy(conf), LOG_EVENT); - ClientConfiguration awsConf = - S3AUtils.createAwsConf(conf, uri.getHost(), - Constants.AWS_SERVICE_IDENTIFIER_STS); - AWSSecurityTokenService tokenService = + + StsClient tokenService = STSClientFactory.builder(parentAuthChain, - awsConf, + conf, endpoint, - region) + region, + uri.getHost()) .build(); stsClient = Optional.of( STSClientFactory.createClientConnection(tokenService, invoker)); @@ -374,11 +370,11 @@ public class SessionTokenBinding extends AbstractDelegationTokenBinding { + " -duration unknown", getCanonicalUri()); } origin += " " + CREDENTIALS_CONVERTED_TO_DELEGATION_TOKEN; - final AWSCredentials awsCredentials - = parentAuthChain.getCredentials(); - if (awsCredentials instanceof AWSSessionCredentials) { + final AwsCredentials awsCredentials + = parentAuthChain.resolveCredentials(); + if (awsCredentials instanceof AwsSessionCredentials) { marshalledCredentials = fromAWSCredentials( - (AWSSessionCredentials) awsCredentials); + (AwsSessionCredentials) awsCredentials); } else { throw new DelegationTokenIOException( "AWS Authentication chain is no longer supplying session secrets"); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java index e53c690431e..09664a6dbdf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java @@ -27,10 +27,11 @@ import java.util.Date; import java.util.List; import java.util.UUID; -import com.amazonaws.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.MultipartUpload; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; @@ -975,7 +976,7 @@ public abstract class AbstractS3ACommitter extends PathOutputCommitter .executeWith(commitContext.getOuterSubmitter()) .suppressExceptions(suppressExceptions) .run(u -> commitContext.abortMultipartCommit( - u.getKey(), u.getUploadId())); + u.key(), u.uploadId())); } else { LOG.info("No pending uploads were found"); } @@ -1300,8 +1301,8 @@ public abstract class AbstractS3ACommitter extends PathOutputCommitter DateFormat df = DateFormat.getDateTimeInstance(); pending.forEach(u -> LOG.info("[{}] {}", - df.format(u.getInitiated()), - u.getKey())); + df.format(Date.from(u.initiated())), + u.key())); if (shouldAbortUploadsInCleanup()) { LOG.warn("This committer will abort these uploads in job cleanup"); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java index 10440f77e72..6c3cf3942d5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java @@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.commit; import java.io.IOException; import java.util.List; -import com.amazonaws.services.s3.model.PartETag; +import software.amazon.awssdk.services.s3.model.CompletedPart; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.statistics.IOStatistics; @@ -76,7 +76,7 @@ public class PutTracker { * @throws IOException I/O problem or validation failure. */ public boolean aboutToComplete(String uploadId, - List parts, + List parts, long bytesWritten, final IOStatistics iostatistics) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java index 77c3fed11fb..e4541ba4da3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java @@ -31,9 +31,10 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import com.amazonaws.services.s3.model.PartETag; +import software.amazon.awssdk.services.s3.model.CompletedPart; import com.fasterxml.jackson.annotation.JsonProperty; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -215,13 +216,13 @@ public class SinglePendingCommit extends PersistentCommitData parts) throws ValidationFailure { + public void bindCommitData(List parts) throws ValidationFailure { etags = new ArrayList<>(parts.size()); int counter = 1; - for (PartETag part : parts) { - verify(part.getPartNumber() == counter, - "Expected part number %s but got %s", counter, part.getPartNumber()); - etags.add(part.getETag()); + for (CompletedPart part : parts) { + verify(part.partNumber() == counter, + "Expected part number %s but got %s", counter, part.partNumber()); + etags.add(part.eTag()); counter++; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java index ef56d829781..d1943fa4777 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java @@ -21,6 +21,8 @@ package org.apache.hadoop.fs.s3a.commit.impl; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -32,13 +34,15 @@ import java.util.stream.IntStream; import javax.annotation.Nullable; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; @@ -155,9 +159,9 @@ public class CommitOperations extends AbstractStoreOperation * @param tagIds list of tags * @return same list, now in numbered tuples */ - public static List toPartEtags(List tagIds) { + public static List toPartEtags(List tagIds) { return IntStream.range(0, tagIds.size()) - .mapToObj(i -> new PartETag(i + 1, tagIds.get(i))) + .mapToObj(i -> CompletedPart.builder().partNumber(i + 1).eTag(tagIds.get(i)).build()) .collect(Collectors.toList()); } @@ -566,26 +570,30 @@ public class CommitOperations extends AbstractStoreOperation numParts, length)); } - List parts = new ArrayList<>((int) numParts); + List parts = new ArrayList<>((int) numParts); LOG.debug("File size is {}, number of parts to upload = {}", length, numParts); - for (int partNumber = 1; partNumber <= numParts; partNumber += 1) { - progress.progress(); - long size = Math.min(length - offset, uploadPartSize); - UploadPartRequest part; - part = writeOperations.newUploadPartRequest( - destKey, - uploadId, - partNumber, - (int) size, - null, - localFile, - offset); - part.setLastPart(partNumber == numParts); - UploadPartResult partResult = writeOperations.uploadPart(part, statistics); - offset += uploadPartSize; - parts.add(partResult.getPartETag()); + + // Open the file to upload. + try (InputStream fileStream = Files.newInputStream(localFile.toPath())) { + for (int partNumber = 1; partNumber <= numParts; partNumber += 1) { + progress.progress(); + long size = Math.min(length - offset, uploadPartSize); + UploadPartRequest part = writeOperations.newUploadPartRequestBuilder( + destKey, + uploadId, + partNumber, + size).build(); + // Read from the file input stream at current position. + RequestBody body = RequestBody.fromInputStream(fileStream, size); + UploadPartResponse response = writeOperations.uploadPart(part, body, statistics); + offset += uploadPartSize; + parts.add(CompletedPart.builder() + .partNumber(partNumber) + .eTag(response.eTag()) + .build()); + } } commitData.bindCommitData(parts); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java index 1a5451df801..b2e703e1b08 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java @@ -20,19 +20,22 @@ package org.apache.hadoop.fs.s3a.commit.magic; import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.HashMap; import java.util.List; import java.util.Map; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3ADataBlocks; import org.apache.hadoop.fs.s3a.WriteOperationHelper; import org.apache.hadoop.fs.s3a.commit.PutTracker; import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit; @@ -125,7 +128,7 @@ public class MagicCommitTracker extends PutTracker { */ @Override public boolean aboutToComplete(String uploadId, - List parts, + List parts, long bytesWritten, final IOStatistics iostatistics) throws IOException { @@ -144,10 +147,9 @@ public class MagicCommitTracker extends PutTracker { headers.put(X_HEADER_MAGIC_MARKER, Long.toString(bytesWritten)); PutObjectRequest originalDestPut = writer.createPutObjectRequest( originalDestKey, - new ByteArrayInputStream(EMPTY), 0, - new PutObjectOptions(true, null, headers)); - upload(originalDestPut); + new PutObjectOptions(true, null, headers), false); + upload(originalDestPut, new ByteArrayInputStream(EMPTY)); // build the commit summary SinglePendingCommit commitData = new SinglePendingCommit(); @@ -170,22 +172,22 @@ public class MagicCommitTracker extends PutTracker { path, pendingPartKey, commitData); PutObjectRequest put = writer.createPutObjectRequest( pendingPartKey, - new ByteArrayInputStream(bytes), - bytes.length, null); - upload(put); + bytes.length, null, false); + upload(put, new ByteArrayInputStream(bytes)); return false; } /** * PUT an object. * @param request the request + * @param inputStream input stream of data to be uploaded * @throws IOException on problems */ @Retries.RetryTranslated - private void upload(PutObjectRequest request) throws IOException { - trackDurationOfInvocation(trackerStatistics, - COMMITTER_MAGIC_MARKER_PUT.getSymbol(), () -> - writer.putObject(request, PutObjectOptions.keepingDirs(), null)); + private void upload(PutObjectRequest request, InputStream inputStream) throws IOException { + trackDurationOfInvocation(trackerStatistics, COMMITTER_MAGIC_MARKER_PUT.getSymbol(), + () -> writer.putObject(request, PutObjectOptions.keepingDirs(), + new S3ADataBlocks.BlockUploadData(inputStream), false, null)); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java new file mode 100644 index 00000000000..12a89d50f6b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +/** + * Enum to map AWS SDK V1 Acl values to SDK V2. + */ +public enum AWSCannedACL { + Private("private"), + PublicRead("public-read"), + PublicReadWrite("public-read-write"), + AuthenticatedRead("authenticated-read"), + AwsExecRead("aws-exec-read"), + BucketOwnerRead("bucket-owner-read"), + BucketOwnerFullControl("bucket-owner-full-control"), + LogDeliveryWrite("log-delivery-write"); + + private final String value; + + AWSCannedACL(String value){ + this.value = value; + } + + public String toString() { + return this.value; + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java new file mode 100644 index 00000000000..a69907755ee --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -0,0 +1,379 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.time.Duration; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.apache.ProxyConfiguration; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.auth.SignerFactory; +import org.apache.hadoop.util.VersionInfo; +import org.apache.http.client.utils.URIBuilder; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_DOMAIN; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_SECURED; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION; +import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_S3; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_STS; +import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX; + +/** + * Methods for configuring the S3 client. + * These methods are used when creating and configuring + * {@link software.amazon.awssdk.services.s3.S3Client} which communicates with the S3 service. + */ +public final class AWSClientConfig { + private static final Logger LOG = LoggerFactory.getLogger(AWSClientConfig.class); + + private AWSClientConfig() { + } + + public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf, + String awsServiceIdentifier) throws IOException { + ClientOverrideConfiguration.Builder overrideConfigBuilder = + ClientOverrideConfiguration.builder(); + + initRequestTimeout(conf, overrideConfigBuilder); + + initUserAgent(conf, overrideConfigBuilder); + + String signer = conf.getTrimmed(SIGNING_ALGORITHM, ""); + if (!signer.isEmpty()) { + LOG.debug("Signer override = {}", signer); + overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER, + SignerFactory.createSigner(signer, SIGNING_ALGORITHM)); + } + + initSigner(conf, overrideConfigBuilder, awsServiceIdentifier); + + return overrideConfigBuilder; + } + + /** + * Configures the http client. + * + * @param conf The Hadoop configuration + * @return Http client builder + * @throws IOException on any problem + */ + public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) + throws IOException { + ApacheHttpClient.Builder httpClientBuilder = + ApacheHttpClient.builder(); + + httpClientBuilder.maxConnections(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS, + DEFAULT_MAXIMUM_CONNECTIONS, 1)); + + int connectionEstablishTimeout = + S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0); + int socketTimeout = S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0); + + httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout)); + httpClientBuilder.socketTimeout(Duration.ofSeconds(socketTimeout)); + + NetworkBinding.bindSSLChannelMode(conf, httpClientBuilder); + + return httpClientBuilder; + } + + /** + * Configures the async http client. + * + * @param conf The Hadoop configuration + * @return Http client builder + */ + public static NettyNioAsyncHttpClient.Builder createAsyncHttpClientBuilder(Configuration conf) { + NettyNioAsyncHttpClient.Builder httpClientBuilder = + NettyNioAsyncHttpClient.builder(); + + httpClientBuilder.maxConcurrency(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS, + DEFAULT_MAXIMUM_CONNECTIONS, 1)); + + int connectionEstablishTimeout = + S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0); + int socketTimeout = S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0); + + httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout)); + httpClientBuilder.readTimeout(Duration.ofSeconds(socketTimeout)); + httpClientBuilder.writeTimeout(Duration.ofSeconds(socketTimeout)); + + // TODO: Don't think you can set a socket factory for the netty client. + // NetworkBinding.bindSSLChannelMode(conf, awsConf); + + return httpClientBuilder; + } + + /** + * Configures the retry policy. + * + * @param conf The Hadoop configuration + * @return Retry policy builder + */ + public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { + + RetryPolicy.Builder retryPolicyBuilder = RetryPolicy.builder(); + + retryPolicyBuilder.numRetries(S3AUtils.intOption(conf, MAX_ERROR_RETRIES, + DEFAULT_MAX_ERROR_RETRIES, 0)); + + return retryPolicyBuilder; + } + + /** + * Configures the proxy. + * + * @param conf The Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @return Proxy configuration + * @throws IOException on any IO problem + */ + public static ProxyConfiguration createProxyConfiguration(Configuration conf, + String bucket) throws IOException { + + ProxyConfiguration.Builder proxyConfigBuilder = ProxyConfiguration.builder(); + + String proxyHost = conf.getTrimmed(PROXY_HOST, ""); + int proxyPort = conf.getInt(PROXY_PORT, -1); + + if (!proxyHost.isEmpty()) { + if (proxyPort >= 0) { + String scheme = conf.getBoolean(PROXY_SECURED, false) ? "https" : "http"; + proxyConfigBuilder.endpoint(buildURI(scheme, proxyHost, proxyPort)); + } else { + if (conf.getBoolean(PROXY_SECURED, false)) { + LOG.warn("Proxy host set without port. Using HTTPS default 443"); + proxyConfigBuilder.endpoint(buildURI("https", proxyHost, 443)); + } else { + LOG.warn("Proxy host set without port. Using HTTP default 80"); + proxyConfigBuilder.endpoint(buildURI("http", proxyHost, 80)); + } + } + final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME, + null, null); + final String proxyPassword = S3AUtils.lookupPassword(bucket, conf, PROXY_PASSWORD, + null, null); + if ((proxyUsername == null) != (proxyPassword == null)) { + String msg = "Proxy error: " + PROXY_USERNAME + " or " + + PROXY_PASSWORD + " set without the other."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + proxyConfigBuilder.username(proxyUsername); + proxyConfigBuilder.password(proxyPassword); + proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN)); + proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); + if (LOG.isDebugEnabled()) { + LOG.debug("Using proxy server {}:{} as user {} with password {} on " + + "domain {} as workstation {}", proxyHost, proxyPort, proxyUsername, proxyPassword, + PROXY_DOMAIN, PROXY_WORKSTATION); + } + } else if (proxyPort >= 0) { + String msg = + "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + + return proxyConfigBuilder.build(); + } + + /** + * Configures the proxy for the async http client. + * + * @param conf The Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @return Proxy configuration + * @throws IOException on any IO problem + */ + public static software.amazon.awssdk.http.nio.netty.ProxyConfiguration + createAsyncProxyConfiguration(Configuration conf, + String bucket) throws IOException { + + software.amazon.awssdk.http.nio.netty.ProxyConfiguration.Builder proxyConfigBuilder = + software.amazon.awssdk.http.nio.netty.ProxyConfiguration.builder(); + + String proxyHost = conf.getTrimmed(PROXY_HOST, ""); + int proxyPort = conf.getInt(PROXY_PORT, -1); + + if (!proxyHost.isEmpty()) { + if (proxyPort >= 0) { + String scheme = conf.getBoolean(PROXY_SECURED, false) ? "https" : "http"; + proxyConfigBuilder.host(proxyHost); + proxyConfigBuilder.port(proxyPort); + proxyConfigBuilder.scheme(scheme); + } else { + if (conf.getBoolean(PROXY_SECURED, false)) { + LOG.warn("Proxy host set without port. Using HTTPS default 443"); + proxyConfigBuilder.host(proxyHost); + proxyConfigBuilder.port(443); + proxyConfigBuilder.scheme("https"); + } else { + LOG.warn("Proxy host set without port. Using HTTP default 80"); + proxyConfigBuilder.host(proxyHost); + proxyConfigBuilder.port(80); + proxyConfigBuilder.scheme("http"); + } + } + final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME, + null, null); + final String proxyPassword = S3AUtils.lookupPassword(bucket, conf, PROXY_PASSWORD, + null, null); + if ((proxyUsername == null) != (proxyPassword == null)) { + String msg = "Proxy error: " + PROXY_USERNAME + " or " + + PROXY_PASSWORD + " set without the other."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + proxyConfigBuilder.username(proxyUsername); + proxyConfigBuilder.password(proxyPassword); + // TODO: check NTLM support + // proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN)); + // proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); + if (LOG.isDebugEnabled()) { + LOG.debug("Using proxy server {}:{} as user {} with password {} on " + + "domain {} as workstation {}", proxyHost, proxyPort, proxyUsername, proxyPassword, + PROXY_DOMAIN, PROXY_WORKSTATION); + } + } else if (proxyPort >= 0) { + String msg = + "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } else { + return null; + } + + return proxyConfigBuilder.build(); + } + + /*** + * Builds a URI, throws an IllegalArgumentException in case of errors. + * + * @param host proxy host + * @param port proxy port + * @return uri with host and port + */ + private static URI buildURI(String scheme, String host, int port) { + try { + return new URIBuilder().setScheme(scheme).setHost(host).setPort(port).build(); + } catch (URISyntaxException e) { + String msg = + "Proxy error: incorrect " + PROXY_HOST + " or " + PROXY_PORT; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + } + + /** + * Initializes the User-Agent header to send in HTTP requests to AWS + * services. We always include the Hadoop version number. The user also + * may set an optional custom prefix to put in front of the Hadoop version + * number. The AWS SDK internally appends its own information, which seems + * to include the AWS SDK version, OS and JVM version. + * + * @param conf Hadoop configuration + * @param clientConfig AWS SDK configuration to update + */ + private static void initUserAgent(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig) { + String userAgent = "Hadoop " + VersionInfo.getVersion(); + String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); + if (!userAgentPrefix.isEmpty()) { + userAgent = userAgentPrefix + ", " + userAgent; + } + LOG.debug("Using User-Agent: {}", userAgent); + clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); + } + + private static void initSigner(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig, String awsServiceIdentifier) + throws IOException { + String configKey = null; + switch (awsServiceIdentifier) { + case AWS_SERVICE_IDENTIFIER_S3: + configKey = SIGNING_ALGORITHM_S3; + break; + case AWS_SERVICE_IDENTIFIER_STS: + configKey = SIGNING_ALGORITHM_STS; + break; + default: + // Nothing to do. The original signer override is already setup + } + if (configKey != null) { + String signerOverride = conf.getTrimmed(configKey, ""); + if (!signerOverride.isEmpty()) { + LOG.debug("Signer override for {}} = {}", awsServiceIdentifier, signerOverride); + clientConfig.putAdvancedOption(SdkAdvancedClientOption.SIGNER, + SignerFactory.createSigner(signerOverride, configKey)); + } + } + } + + /** + * Configures request timeout. + * + * @param conf Hadoop configuration + * @param clientConfig AWS SDK configuration to update + */ + private static void initRequestTimeout(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig) { + long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, + DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); + + if (requestTimeoutMillis > Integer.MAX_VALUE) { + LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", + requestTimeoutMillis, Integer.MAX_VALUE); + requestTimeoutMillis = Integer.MAX_VALUE; + } + + if(requestTimeoutMillis > 0) { + clientConfig.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis)); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java new file mode 100644 index 00000000000..3cb714588bd --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +/** + * Common S3 HTTP header values used throughout the Amazon Web Services S3 Java client. + */ +public interface AWSHeaders { + + /* + * Standard HTTP Headers + */ + + String CACHE_CONTROL = "Cache-Control"; + String CONTENT_DISPOSITION = "Content-Disposition"; + String CONTENT_ENCODING = "Content-Encoding"; + String CONTENT_LENGTH = "Content-Length"; + String CONTENT_RANGE = "Content-Range"; + String CONTENT_MD5 = "Content-MD5"; + String CONTENT_TYPE = "Content-Type"; + String CONTENT_LANGUAGE = "Content-Language"; + String DATE = "Date"; + String ETAG = "ETag"; + String LAST_MODIFIED = "Last-Modified"; + + /* + * Amazon HTTP Headers used by S3A + */ + + /** S3's version ID header */ + String S3_VERSION_ID = "x-amz-version-id"; + + /** Header describing what class of storage a user wants */ + String STORAGE_CLASS = "x-amz-storage-class"; + + /** Header describing what archive tier the object is in, if any */ + String ARCHIVE_STATUS = "x-amz-archive-status"; + + /** Header for optional server-side encryption algorithm */ + String SERVER_SIDE_ENCRYPTION = "x-amz-server-side-encryption"; + + /** Range header for the get object request */ + String RANGE = "Range"; + + /** + * Encrypted symmetric key header that is used in the Encryption Only (EO) envelope + * encryption mechanism. + */ + @Deprecated + String CRYPTO_KEY = "x-amz-key"; + + /** JSON-encoded description of encryption materials used during encryption */ + String MATERIALS_DESCRIPTION = "x-amz-matdesc"; + + /** Header for the optional restore information of an object */ + String RESTORE = "x-amz-restore"; + + /** + * Key wrapping algorithm such as "AESWrap" and "RSA/ECB/OAEPWithSHA-256AndMGF1Padding". + */ + String CRYPTO_KEYWRAP_ALGORITHM = "x-amz-wrap-alg"; + /** + * Content encryption algorithm, such as "AES/GCM/NoPadding". + */ + String CRYPTO_CEK_ALGORITHM = "x-amz-cek-alg"; + + /** + * Headers in request indicating that the requester must be charged for data + * transfer. + */ + String REQUESTER_PAYS_HEADER = "x-amz-request-payer"; + + /** Header for the replication status of an Amazon S3 Object.*/ + String OBJECT_REPLICATION_STATUS = "x-amz-replication-status"; + + String OBJECT_LOCK_MODE = "x-amz-object-lock-mode"; + + String OBJECT_LOCK_RETAIN_UNTIL_DATE = "x-amz-object-lock-retain-until-date"; + + String OBJECT_LOCK_LEGAL_HOLD_STATUS = "x-amz-object-lock-legal-hold"; + +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java index 4169a9899cb..5808607762e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java @@ -20,8 +20,9 @@ package org.apache.hadoop.fs.s3a.impl; import java.util.List; -import com.amazonaws.SdkClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +31,7 @@ import org.apache.hadoop.fs.s3a.S3AStorageStatistics; import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; + import static org.apache.hadoop.fs.s3a.S3AUtils.isThrottleException; import static org.apache.hadoop.fs.s3a.Statistic.IGNORED_ERRORS; import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_THROTTLED; @@ -113,15 +115,15 @@ public class BulkDeleteRetryHandler extends AbstractStoreOperation { * @param deleteRequest request which failed. */ private void onDeleteThrottled(final DeleteObjectsRequest deleteRequest) { - final List keys = deleteRequest.getKeys(); + final List keys = deleteRequest.delete().objects(); final int size = keys.size(); incrementStatistic(STORE_IO_THROTTLED, size); instrumentation.addValueToQuantiles(STORE_IO_THROTTLE_RATE, size); THROTTLE_LOG.info( "Bulk delete {} keys throttled -first key = {}; last = {}", size, - keys.get(0).getKey(), - keys.get(size - 1).getKey()); + keys.get(0).key(), + keys.get(size - 1).key()); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index 205826dadae..65291aa7b26 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -20,11 +20,12 @@ package org.apache.hadoop.fs.s3a.impl; import java.util.Locale; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.transfer.model.CopyResult; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -211,11 +212,24 @@ public abstract class ChangeDetectionPolicy { * null if the attribute is unavailable (such as when the policy says to use * versionId but object versioning is not enabled for the bucket). */ - public abstract String getRevisionId(ObjectMetadata objectMetadata, + public abstract String getRevisionId(HeadObjectResponse objectMetadata, String uri); /** - * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the + * Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the + * * revision identifier from {@link GetObjectResponse}. + * + * @param getObjectResponse the response instance + * @param uri the URI of the object + * @return the revisionId string as interpreted by this policy, or potentially + * null if the attribute is unavailable (such as when the policy says to use + * versionId but object versioning is not enabled for the bucket). + */ + public abstract String getRevisionId(GetObjectResponse getObjectResponse, + String uri); + + /** + * Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the * revision identifier from {@link S3ObjectAttributes}. * * @param s3Attributes the object attributes @@ -226,44 +240,44 @@ public abstract class ChangeDetectionPolicy { public abstract String getRevisionId(S3ObjectAttributes s3Attributes); /** - * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the - * revision identifier from {@link CopyResult}. + * Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the + * revision identifier from {@link CopyObjectResponse}. * - * @param copyResult the copy result + * @param copyObjectResponse the copy response * @return the revisionId string as interpreted by this policy, or potentially * null if the attribute is unavailable (such as when the policy says to use * versionId but object versioning is not enabled for the bucket). */ - public abstract String getRevisionId(CopyResult copyResult); + public abstract String getRevisionId(CopyObjectResponse copyObjectResponse); /** - * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} - * as a server-side qualification on the {@code GetObjectRequest}. + * Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId} + * as a server-side qualification on the {@code GetObjectRequest.Builder}. * * @param request the request * @param revisionId the revision id */ - public abstract void applyRevisionConstraint(GetObjectRequest request, + public abstract void applyRevisionConstraint(GetObjectRequest.Builder request, String revisionId); /** - * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId} * as a server-side qualification on the {@code CopyObjectRequest}. * - * @param request the request + * @param requestBuilder the copy object request builder * @param revisionId the revision id */ - public abstract void applyRevisionConstraint(CopyObjectRequest request, + public abstract void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId); /** - * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId} * as a server-side qualification on the {@code GetObjectMetadataRequest}. * - * @param request the request + * @param requestBuilder the head object request builder * @param revisionId the revision id */ - public abstract void applyRevisionConstraint(GetObjectMetadataRequest request, + public abstract void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId); /** @@ -323,7 +337,7 @@ public abstract class ChangeDetectionPolicy { } /** - * Change detection policy based on {@link ObjectMetadata#getETag() eTag}. + * Change detection policy based on {@link HeadObjectResponse#eTag()} () eTag}. */ static class ETagChangeDetectionPolicy extends ChangeDetectionPolicy { @@ -332,8 +346,13 @@ public abstract class ChangeDetectionPolicy { } @Override - public String getRevisionId(ObjectMetadata objectMetadata, String uri) { - return objectMetadata.getETag(); + public String getRevisionId(GetObjectResponse objectMetadata, String uri) { + return objectMetadata.eTag(); + } + + @Override + public String getRevisionId(HeadObjectResponse objectMetadata, String uri) { + return objectMetadata.eTag(); } @Override @@ -342,34 +361,34 @@ public abstract class ChangeDetectionPolicy { } @Override - public String getRevisionId(CopyResult copyResult) { - return copyResult.getETag(); + public String getRevisionId(CopyObjectResponse copyObjectResponse) { + return copyObjectResponse.copyObjectResult().eTag(); } @Override - public void applyRevisionConstraint(GetObjectRequest request, + public void applyRevisionConstraint(GetObjectRequest.Builder builder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting get request to etag {}", revisionId); - request.withMatchingETagConstraint(revisionId); + builder.ifMatch(revisionId); } else { LOG.debug("No etag revision ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(CopyObjectRequest request, + public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting copy request to etag {}", revisionId); - request.withMatchingETagConstraint(revisionId); + requestBuilder.copySourceIfMatch(revisionId); } else { LOG.debug("No etag revision ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, + public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId) { LOG.debug("Unable to restrict HEAD request to etag; will check later"); } @@ -388,7 +407,7 @@ public abstract class ChangeDetectionPolicy { /** * Change detection policy based on - * {@link ObjectMetadata#getVersionId() versionId}. + * {@link HeadObjectResponse#versionId()} () versionId}. */ static class VersionIdChangeDetectionPolicy extends ChangeDetectionPolicy { @@ -398,8 +417,16 @@ public abstract class ChangeDetectionPolicy { } @Override - public String getRevisionId(ObjectMetadata objectMetadata, String uri) { - String versionId = objectMetadata.getVersionId(); + public String getRevisionId(HeadObjectResponse objectMetadata, String uri) { + return logIfNull(objectMetadata.versionId(), uri); + } + + @Override + public String getRevisionId(GetObjectResponse getObjectResponse, String uri) { + return logIfNull(getObjectResponse.versionId(), uri); + } + + private String logIfNull(String versionId, String uri) { if (versionId == null) { // this policy doesn't work if the bucket doesn't have object versioning // enabled (which isn't by default) @@ -419,38 +446,38 @@ public abstract class ChangeDetectionPolicy { } @Override - public String getRevisionId(CopyResult copyResult) { - return copyResult.getVersionId(); + public String getRevisionId(CopyObjectResponse copyObjectResponse) { + return copyObjectResponse.versionId(); } @Override - public void applyRevisionConstraint(GetObjectRequest request, + public void applyRevisionConstraint(GetObjectRequest.Builder builder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting get request to version {}", revisionId); - request.withVersionId(revisionId); + builder.versionId(revisionId); } else { LOG.debug("No version ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(CopyObjectRequest request, + public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting copy request to version {}", revisionId); - request.withSourceVersionId(revisionId); + requestBuilder.sourceVersionId(revisionId); } else { LOG.debug("No version ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, + public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting metadata request to version {}", revisionId); - request.withVersionId(revisionId); + requestBuilder.versionId(revisionId); } else { LOG.debug("No version ID to use as a constraint"); } @@ -482,7 +509,13 @@ public abstract class ChangeDetectionPolicy { } @Override - public String getRevisionId(final ObjectMetadata objectMetadata, + public String getRevisionId(final GetObjectResponse objectMetadata, + final String uri) { + return null; + } + + @Override + public String getRevisionId(final HeadObjectResponse objectMetadata, final String uri) { return null; } @@ -493,24 +526,24 @@ public abstract class ChangeDetectionPolicy { } @Override - public String getRevisionId(CopyResult copyResult) { + public String getRevisionId(CopyObjectResponse copyObjectResponse) { return null; } @Override - public void applyRevisionConstraint(final GetObjectRequest request, + public void applyRevisionConstraint(final GetObjectRequest.Builder builder, final String revisionId) { } @Override - public void applyRevisionConstraint(CopyObjectRequest request, + public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId) { } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, + public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId) { } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index 2e144bbfcd0..c7e9139f32d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -18,14 +18,14 @@ package org.apache.hadoop.fs.s3a.impl; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.transfer.model.CopyResult; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.s3a.RemoteFileChangedException; import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.statistics.ChangeTrackerStatistics; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_412_PRECONDITION_FAILED; import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; /** @@ -55,8 +56,6 @@ public class ChangeTracker { private static final Logger LOG = LoggerFactory.getLogger(ChangeTracker.class); - /** {@code 412 Precondition Failed} (HTTP/1.1 - RFC 2616) */ - public static final int SC_PRECONDITION_FAILED = 412; public static final String CHANGE_REPORTED_BY_S3 = "Change reported by S3"; /** Policy to use. */ @@ -117,15 +116,15 @@ public class ChangeTracker { /** * Apply any revision control set by the policy if it is to be * enforced on the server. - * @param request request to modify + * @param builder request builder to modify * @return true iff a constraint was added. */ public boolean maybeApplyConstraint( - final GetObjectRequest request) { + final GetObjectRequest.Builder builder) { if (policy.getMode() == ChangeDetectionPolicy.Mode.Server && revisionId != null) { - policy.applyRevisionConstraint(request, revisionId); + policy.applyRevisionConstraint(builder, revisionId); return true; } return false; @@ -134,26 +133,26 @@ public class ChangeTracker { /** * Apply any revision control set by the policy if it is to be * enforced on the server. - * @param request request to modify + * @param requestBuilder copy object request builder to modify * @return true iff a constraint was added. */ public boolean maybeApplyConstraint( - final CopyObjectRequest request) { + final CopyObjectRequest.Builder requestBuilder) { if (policy.getMode() == ChangeDetectionPolicy.Mode.Server && revisionId != null) { - policy.applyRevisionConstraint(request, revisionId); + policy.applyRevisionConstraint(requestBuilder, revisionId); return true; } return false; } public boolean maybeApplyConstraint( - final GetObjectMetadataRequest request) { + final HeadObjectRequest.Builder requestBuilder) { if (policy.getMode() == ChangeDetectionPolicy.Mode.Server && revisionId != null) { - policy.applyRevisionConstraint(request, revisionId); + policy.applyRevisionConstraint(requestBuilder, revisionId); return true; } return false; @@ -168,7 +167,7 @@ public class ChangeTracker { * @throws PathIOException raised on failure * @throws RemoteFileChangedException if the remote file has changed. */ - public void processResponse(final S3Object object, + public void processResponse(final GetObjectResponse object, final String operation, final long pos) throws PathIOException { if (object == null) { @@ -191,24 +190,24 @@ public class ChangeTracker { } } - processMetadata(object.getObjectMetadata(), operation); + processMetadata(object, operation); } /** * Process the response from the server for validation against the * change policy. - * @param copyResult result of a copy operation + * @param copyObjectResponse response of a copy operation * @throws PathIOException raised on failure * @throws RemoteFileChangedException if the remote file has changed. */ - public void processResponse(final CopyResult copyResult) + public void processResponse(final CopyObjectResponse copyObjectResponse) throws PathIOException { // ETag (sometimes, depending on encryption and/or multipart) is not the // same on the copied object as the original. Version Id seems to never // be the same on the copy. As such, there isn't really anything that // can be verified on the response, except that a revision ID is present // if required. - String newRevisionId = policy.getRevisionId(copyResult); + String newRevisionId = policy.getRevisionId(copyObjectResponse); LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId); if (newRevisionId == null && policy.isRequireVersion()) { throw new NoVersionAttributeException(uri, String.format( @@ -227,13 +226,11 @@ public class ChangeTracker { * generated (e.g. "copy", "read", "select"). * @throws RemoteFileChangedException if the remote file has changed. */ - public void processException(SdkBaseException e, String operation) throws + public void processException(SdkException e, String operation) throws RemoteFileChangedException { - if (e instanceof AmazonServiceException) { - AmazonServiceException serviceException = (AmazonServiceException) e; - // This isn't really going to be hit due to - // https://github.com/aws/aws-sdk-java/issues/1644 - if (serviceException.getStatusCode() == SC_PRECONDITION_FAILED) { + if (e instanceof AwsServiceException) { + AwsServiceException serviceException = (AwsServiceException)e; + if (serviceException.statusCode() == SC_412_PRECONDITION_FAILED) { versionMismatches.versionMismatchError(); throw new RemoteFileChangedException(uri, operation, String.format( RemoteFileChangedException.PRECONDITIONS_FAILED @@ -254,12 +251,26 @@ public class ChangeTracker { * @throws PathIOException raised on failure * @throws RemoteFileChangedException if the remote file has changed. */ - public void processMetadata(final ObjectMetadata metadata, + public void processMetadata(final HeadObjectResponse metadata, final String operation) throws PathIOException { final String newRevisionId = policy.getRevisionId(metadata, uri); processNewRevision(newRevisionId, operation, -1); } + /** + * Process the response from server for validation against the change + * policy. + * @param getObjectResponse response returned from server + * @param operation operation in progress + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processMetadata(final GetObjectResponse getObjectResponse, + final String operation) throws PathIOException { + final String newRevisionId = policy.getRevisionId(getObjectResponse, uri); + processNewRevision(newRevisionId, operation, -1); + } + /** * Validate a revision from the server against our expectations. * @param newRevisionId new revision. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java index 8205668e8f3..ba21f6028a5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java @@ -21,8 +21,8 @@ package org.apache.hadoop.fs.s3a.impl; import javax.net.ssl.HostnameVerifier; import java.io.IOException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.thirdparty.apache.http.conn.ssl.SSLConnectionSocketFactory; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.thirdparty.org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; @@ -35,13 +35,12 @@ public class ConfigureShadedAWSSocketFactory implements NetworkBinding.ConfigureAWSSocketFactory { @Override - public void configureSocketFactory(final ClientConfiguration awsConf, + public void configureSocketFactory(final ApacheHttpClient.Builder httpClientBuilder, final DelegatingSSLSocketFactory.SSLChannelMode channelMode) throws IOException { DelegatingSSLSocketFactory.initializeDefaultFactory(channelMode); - awsConf.getApacheHttpClientConfig().setSslSocketFactory( - new SSLConnectionSocketFactory( - DelegatingSSLSocketFactory.getDefaultFactory(), - (HostnameVerifier) null)); + httpClientBuilder.socketFactory(new SSLConnectionSocketFactory( + DelegatingSSLSocketFactory.getDefaultFactory(), + (HostnameVerifier) null)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java deleted file mode 100644 index 16459ac45b8..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.impl; - -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.transfer.Copy; -import com.amazonaws.services.s3.transfer.model.CopyResult; - -/** - * Extracts the outcome of a TransferManager-executed copy operation. - */ -public final class CopyOutcome { - - /** - * Result of a successful copy. - */ - private final CopyResult copyResult; - - /** the copy was interrupted. */ - private final InterruptedException interruptedException; - - /** - * The copy raised an AWS Exception of some form. - */ - private final SdkBaseException awsException; - - public CopyOutcome(CopyResult copyResult, - InterruptedException interruptedException, - SdkBaseException awsException) { - this.copyResult = copyResult; - this.interruptedException = interruptedException; - this.awsException = awsException; - } - - public CopyResult getCopyResult() { - return copyResult; - } - - public InterruptedException getInterruptedException() { - return interruptedException; - } - - public SdkBaseException getAwsException() { - return awsException; - } - - /** - * Calls {@code Copy.waitForCopyResult()} to await the result, converts - * it to a copy outcome. - * Exceptions caught and - * @param copy the copy operation. - * @return the outcome. - */ - public static CopyOutcome waitForCopy(Copy copy) { - try { - CopyResult result = copy.waitForCopyResult(); - return new CopyOutcome(result, null, null); - } catch (SdkBaseException e) { - return new CopyOutcome(null, null, e); - } catch (InterruptedException e) { - return new CopyOutcome(null, e, null); - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java index 0797c36c529..44469358906 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java @@ -24,7 +24,7 @@ import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.slf4j.Logger; @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.util.DurationInfo; + import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan; import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.maybeAwaitCompletion; @@ -386,9 +387,9 @@ public class DeleteOperation extends ExecutingStoreOperation { "Delete page of %d keys", keyList.size())) { if (!keyList.isEmpty()) { // first delete the files. - List files = keyList.stream() + List files = keyList.stream() .filter(e -> !e.isDirMarker) - .map(e -> e.keyVersion) + .map(e -> e.objectIdentifier) .collect(Collectors.toList()); LOG.debug("Deleting of {} file objects", files.size()); Invoker.once("Remove S3 Files", @@ -398,9 +399,9 @@ public class DeleteOperation extends ExecutingStoreOperation { false )); // now the dirs - List dirs = keyList.stream() + List dirs = keyList.stream() .filter(e -> e.isDirMarker) - .map(e -> e.keyVersion) + .map(e -> e.objectIdentifier) .collect(Collectors.toList()); LOG.debug("Deleting of {} directory markers", dirs.size()); // This is invoked with deleteFakeDir. @@ -422,17 +423,17 @@ public class DeleteOperation extends ExecutingStoreOperation { * to choose which statistics to update. */ private static final class DeleteEntry { - private final DeleteObjectsRequest.KeyVersion keyVersion; + private final ObjectIdentifier objectIdentifier; private final boolean isDirMarker; private DeleteEntry(final String key, final boolean isDirMarker) { - this.keyVersion = new DeleteObjectsRequest.KeyVersion(key); + this.objectIdentifier = ObjectIdentifier.builder().key(key).build(); this.isDirMarker = isDirMarker; } public String getKey() { - return keyVersion.getKey(); + return objectIdentifier.key(); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java index f7e06413a37..54a91323bc2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java @@ -18,9 +18,9 @@ package org.apache.hadoop.fs.s3a.impl; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; /** * Translate from AWS SDK-wrapped exceptions into IOExceptions with @@ -49,9 +49,9 @@ public class ErrorTranslation { * @return true if the status code and error code mean that the * remote bucket is unknown. */ - public static boolean isUnknownBucket(AmazonServiceException e) { - return e.getStatusCode() == SC_404 - && AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode()); + public static boolean isUnknownBucket(AwsServiceException e) { + return e.statusCode() == SC_404_NOT_FOUND + && AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.awsErrorDetails().errorCode()); } /** @@ -62,8 +62,8 @@ public class ErrorTranslation { * @return true if the status code and error code mean that the * HEAD request returned 404 but the bucket was there. */ - public static boolean isObjectNotFound(AmazonServiceException e) { - return e.getStatusCode() == SC_404 && !isUnknownBucket(e); + public static boolean isObjectNotFound(AwsServiceException e) { + return e.statusCode() == SC_404_NOT_FOUND && !isUnknownBucket(e); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java index f75066e049d..d42dda59caa 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java @@ -23,13 +23,15 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Date; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.TreeMap; -import com.amazonaws.services.s3.Headers; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,50 +77,50 @@ public class HeaderProcessing extends AbstractStoreOperation { * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CACHE_CONTROL = - XA_HEADER_PREFIX + Headers.CACHE_CONTROL; + XA_HEADER_PREFIX + AWSHeaders.CACHE_CONTROL; /** * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CONTENT_DISPOSITION = - XA_HEADER_PREFIX + Headers.CONTENT_DISPOSITION; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_DISPOSITION; /** * Content encoding; can be configured: {@value}. */ public static final String XA_CONTENT_ENCODING = - XA_HEADER_PREFIX + Headers.CONTENT_ENCODING; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_ENCODING; /** * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CONTENT_LANGUAGE = - XA_HEADER_PREFIX + Headers.CONTENT_LANGUAGE; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_LANGUAGE; /** * Length XAttr: {@value}. */ public static final String XA_CONTENT_LENGTH = - XA_HEADER_PREFIX + Headers.CONTENT_LENGTH; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_LENGTH; /** * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CONTENT_MD5 = - XA_HEADER_PREFIX + Headers.CONTENT_MD5; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_MD5; /** * Content range: {@value}. * This is returned on GET requests with ranges. */ public static final String XA_CONTENT_RANGE = - XA_HEADER_PREFIX + Headers.CONTENT_RANGE; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_RANGE; /** * Content type: may be set when uploading. * {@value}. */ public static final String XA_CONTENT_TYPE = - XA_HEADER_PREFIX + Headers.CONTENT_TYPE; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_TYPE; /** * Etag Header {@value}. @@ -126,14 +128,14 @@ public class HeaderProcessing extends AbstractStoreOperation { * it can be retrieved via {@code getFileChecksum(path)} if * the S3A connector is enabled. */ - public static final String XA_ETAG = XA_HEADER_PREFIX + Headers.ETAG; + public static final String XA_ETAG = XA_HEADER_PREFIX + AWSHeaders.ETAG; /** * last modified XAttr: {@value}. */ public static final String XA_LAST_MODIFIED = - XA_HEADER_PREFIX + Headers.LAST_MODIFIED; + XA_HEADER_PREFIX + AWSHeaders.LAST_MODIFIED; /* AWS Specific Headers. May not be found on other S3 endpoints. */ @@ -144,50 +146,50 @@ public class HeaderProcessing extends AbstractStoreOperation { * Value {@value}. */ public static final String XA_ARCHIVE_STATUS = - XA_HEADER_PREFIX + Headers.ARCHIVE_STATUS; + XA_HEADER_PREFIX + AWSHeaders.ARCHIVE_STATUS; /** * Object legal hold status. {@value}. */ public static final String XA_OBJECT_LOCK_LEGAL_HOLD_STATUS = - XA_HEADER_PREFIX + Headers.OBJECT_LOCK_LEGAL_HOLD_STATUS; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_LEGAL_HOLD_STATUS; /** * Object lock mode. {@value}. */ public static final String XA_OBJECT_LOCK_MODE = - XA_HEADER_PREFIX + Headers.OBJECT_LOCK_MODE; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_MODE; /** * ISO8601 expiry date of object lock hold. {@value}. */ public static final String XA_OBJECT_LOCK_RETAIN_UNTIL_DATE = - XA_HEADER_PREFIX + Headers.OBJECT_LOCK_RETAIN_UNTIL_DATE; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_RETAIN_UNTIL_DATE; /** * Replication status for cross-region replicated objects. {@value}. */ public static final String XA_OBJECT_REPLICATION_STATUS = - XA_HEADER_PREFIX + Headers.OBJECT_REPLICATION_STATUS; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_REPLICATION_STATUS; /** * Version ID; empty for non-versioned buckets/data. {@value}. */ public static final String XA_S3_VERSION_ID = - XA_HEADER_PREFIX + Headers.S3_VERSION_ID; + XA_HEADER_PREFIX + AWSHeaders.S3_VERSION_ID; /** * The server-side encryption algorithm to use * with AWS-managed keys: {@value}. */ public static final String XA_SERVER_SIDE_ENCRYPTION = - XA_HEADER_PREFIX + Headers.SERVER_SIDE_ENCRYPTION; + XA_HEADER_PREFIX + AWSHeaders.SERVER_SIDE_ENCRYPTION; /** * Storage Class XAttr: {@value}. */ public static final String XA_STORAGE_CLASS = - XA_HEADER_PREFIX + Headers.STORAGE_CLASS; + XA_HEADER_PREFIX + AWSHeaders.STORAGE_CLASS; /** * HTTP Referrer for logs: {@value}. @@ -275,9 +277,28 @@ public class HeaderProcessing extends AbstractStoreOperation { final Statistic statistic) throws IOException { StoreContext context = getStoreContext(); String objectKey = context.pathToKey(path); - ObjectMetadata md; String symbol = statistic.getSymbol(); S3AStatisticsContext instrumentation = context.getInstrumentation(); + Map headers = new TreeMap<>(); + HeadObjectResponse md; + + // Attempting to get metadata for the root, so use head bucket. + if (objectKey.isEmpty()) { + HeadBucketResponse headBucketResponse = + trackDuration(instrumentation, symbol, () -> callbacks.getBucketMetadata()); + + if (headBucketResponse.sdkHttpResponse() != null + && headBucketResponse.sdkHttpResponse().headers() != null + && headBucketResponse.sdkHttpResponse().headers().get(AWSHeaders.CONTENT_TYPE) != null) { + maybeSetHeader(headers, XA_CONTENT_TYPE, + headBucketResponse.sdkHttpResponse().headers().get(AWSHeaders.CONTENT_TYPE).get(0)); + } + + maybeSetHeader(headers, XA_CONTENT_LENGTH, 0); + + return headers; + } + try { md = trackDuration(instrumentation, symbol, () -> callbacks.getObjectMetadata(objectKey)); @@ -287,59 +308,64 @@ public class HeaderProcessing extends AbstractStoreOperation { callbacks.getObjectMetadata(objectKey + "/")); } // all user metadata - Map rawHeaders = md.getUserMetadata(); - Map headers = new TreeMap<>(); + Map rawHeaders = md.metadata(); rawHeaders.forEach((key, value) -> headers.put(XA_HEADER_PREFIX + key, encodeBytes(value))); // and add the usual content length &c, if set maybeSetHeader(headers, XA_CACHE_CONTROL, - md.getCacheControl()); + md.cacheControl()); maybeSetHeader(headers, XA_CONTENT_DISPOSITION, - md.getContentDisposition()); + md.contentDisposition()); maybeSetHeader(headers, XA_CONTENT_ENCODING, - md.getContentEncoding()); + md.contentEncoding()); maybeSetHeader(headers, XA_CONTENT_LANGUAGE, - md.getContentLanguage()); + md.contentLanguage()); // If CSE is enabled, use the unencrypted content length. - if (md.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null - && md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH) != null) { - maybeSetHeader(headers, XA_CONTENT_LENGTH, - md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH)); - } else { - maybeSetHeader(headers, XA_CONTENT_LENGTH, - md.getContentLength()); + // TODO: CSE is not supported yet, add these headers in during CSE work. +// if (md.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null +// && md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH) != null) { +// maybeSetHeader(headers, XA_CONTENT_LENGTH, +// md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH)); +// } else { +// maybeSetHeader(headers, XA_CONTENT_LENGTH, +// md.contentLength()); +// } +// maybeSetHeader(headers, XA_CONTENT_MD5, +// md.getContentMD5()); + // TODO: Add back in else block during CSE work. + maybeSetHeader(headers, XA_CONTENT_LENGTH, + md.contentLength()); + if (md.sdkHttpResponse() != null && md.sdkHttpResponse().headers() != null + && md.sdkHttpResponse().headers().get("Content-Range") != null) { + maybeSetHeader(headers, XA_CONTENT_RANGE, + md.sdkHttpResponse().headers().get("Content-Range").get(0)); } - maybeSetHeader(headers, XA_CONTENT_MD5, - md.getContentMD5()); - maybeSetHeader(headers, XA_CONTENT_RANGE, - md.getContentRange()); maybeSetHeader(headers, XA_CONTENT_TYPE, - md.getContentType()); + md.contentType()); maybeSetHeader(headers, XA_ETAG, - md.getETag()); + md.eTag()); maybeSetHeader(headers, XA_LAST_MODIFIED, - md.getLastModified()); + Date.from(md.lastModified())); // AWS custom headers maybeSetHeader(headers, XA_ARCHIVE_STATUS, - md.getArchiveStatus()); + md.archiveStatus()); maybeSetHeader(headers, XA_OBJECT_LOCK_LEGAL_HOLD_STATUS, - md.getObjectLockLegalHoldStatus()); + md.objectLockLegalHoldStatus()); maybeSetHeader(headers, XA_OBJECT_LOCK_MODE, - md.getObjectLockMode()); + md.objectLockMode()); maybeSetHeader(headers, XA_OBJECT_LOCK_RETAIN_UNTIL_DATE, - md.getObjectLockRetainUntilDate()); + md.objectLockRetainUntilDate()); maybeSetHeader(headers, XA_OBJECT_REPLICATION_STATUS, - md.getReplicationStatus()); + md.replicationStatus()); maybeSetHeader(headers, XA_S3_VERSION_ID, - md.getVersionId()); + md.versionId()); maybeSetHeader(headers, XA_SERVER_SIDE_ENCRYPTION, - md.getSSEAlgorithm()); + md.serverSideEncryptionAsString()); maybeSetHeader(headers, XA_STORAGE_CLASS, - md.getStorageClass()); - maybeSetHeader(headers, XA_STORAGE_CLASS, - md.getReplicationStatus()); + md.storageClassAsString()); + return headers; } @@ -458,70 +484,51 @@ public class HeaderProcessing extends AbstractStoreOperation { } /** - * Creates a copy of the passed {@link ObjectMetadata}. - * Does so without using the {@link ObjectMetadata#clone()} method, - * to avoid copying unnecessary headers. + * Creates a copy of the passed metadata. * This operation does not copy the {@code X_HEADER_MAGIC_MARKER} * header to avoid confusion. If a marker file is renamed, * it loses information about any remapped file. * If new fields are added to ObjectMetadata which are not * present in the user metadata headers, they will not be picked * up or cloned unless this operation is updated. - * @param source the {@link ObjectMetadata} to copy + * @param source the source metadata to copy * @param dest the metadata to update; this is the return value. + * @param copyObjectRequestBuilder CopyObjectRequest builder */ - public static void cloneObjectMetadata(ObjectMetadata source, - ObjectMetadata dest) { + public static void cloneObjectMetadata(HeadObjectResponse source, + Map dest, CopyObjectRequest.Builder copyObjectRequestBuilder) { // Possibly null attributes // Allowing nulls to pass breaks it during later use - if (source.getCacheControl() != null) { - dest.setCacheControl(source.getCacheControl()); + if (source.cacheControl() != null) { + copyObjectRequestBuilder.cacheControl(source.cacheControl()); } - if (source.getContentDisposition() != null) { - dest.setContentDisposition(source.getContentDisposition()); + if (source.contentDisposition() != null) { + copyObjectRequestBuilder.contentDisposition(source.contentDisposition()); } - if (source.getContentEncoding() != null) { - dest.setContentEncoding(source.getContentEncoding()); + if (source.contentEncoding() != null) { + copyObjectRequestBuilder.contentEncoding(source.contentEncoding()); } - if (source.getContentMD5() != null) { - dest.setContentMD5(source.getContentMD5()); + + if (source.contentType() != null) { + copyObjectRequestBuilder.contentType(source.contentType()); } - if (source.getContentType() != null) { - dest.setContentType(source.getContentType()); + + if (source.serverSideEncryption() != null) { + copyObjectRequestBuilder.serverSideEncryption(source.serverSideEncryption()); } - if (source.getExpirationTime() != null) { - dest.setExpirationTime(source.getExpirationTime()); + + if (source.sseCustomerAlgorithm() != null) { + copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(source.sseCustomerAlgorithm()); } - if (source.getExpirationTimeRuleId() != null) { - dest.setExpirationTimeRuleId(source.getExpirationTimeRuleId()); - } - if (source.getHttpExpiresDate() != null) { - dest.setHttpExpiresDate(source.getHttpExpiresDate()); - } - if (source.getLastModified() != null) { - dest.setLastModified(source.getLastModified()); - } - if (source.getOngoingRestore() != null) { - dest.setOngoingRestore(source.getOngoingRestore()); - } - if (source.getRestoreExpirationTime() != null) { - dest.setRestoreExpirationTime(source.getRestoreExpirationTime()); - } - if (source.getSSEAlgorithm() != null) { - dest.setSSEAlgorithm(source.getSSEAlgorithm()); - } - if (source.getSSECustomerAlgorithm() != null) { - dest.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm()); - } - if (source.getSSECustomerKeyMd5() != null) { - dest.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5()); + if (source.sseCustomerKeyMD5() != null) { + copyObjectRequestBuilder.copySourceSSECustomerKeyMD5(source.sseCustomerKeyMD5()); } // copy user metadata except the magic marker header. - source.getUserMetadata().entrySet().stream() + source.metadata().entrySet().stream() .filter(e -> !e.getKey().equals(X_HEADER_MAGIC_MARKER)) - .forEach(e -> dest.addUserMetadata(e.getKey(), e.getValue())); + .forEach(e -> dest.put(e.getKey(), e.getValue())); } public interface HeaderProcessingCallbacks { @@ -534,6 +541,15 @@ public class HeaderProcessing extends AbstractStoreOperation { * @throws IOException IO and object access problems. */ @Retries.RetryTranslated - ObjectMetadata getObjectMetadata(String key) throws IOException; + HeadObjectResponse getObjectMetadata(String key) throws IOException; + + /** + * Retrieve the bucket metadata. + * + * @return metadata + * @throws IOException IO and object access problems. + */ + @Retries.RetryTranslated + HeadBucketResponse getBucketMetadata() throws IOException; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index f8944796469..7bf4e619e0a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -110,11 +110,58 @@ public final class InternalConstants { S3A_OPENFILE_KEYS = Collections.unmodifiableSet(keys); } + /** 200 status code: OK. */ + public static final int SC_200_OK = 200; + + /** 301 status code: Moved Permanently. */ + public static final int SC_301_MOVED_PERMANENTLY = 301; + + /** 307 status code: Temporary Redirect. */ + public static final int SC_307_TEMPORARY_REDIRECT = 307; + + /** 400 status code: Bad Request. */ + public static final int SC_400_BAD_REQUEST = 400; + + /** 401 status code: Unauthorized. */ + public static final int SC_401_UNAUTHORIZED = 401; + + /** 403 status code: Forbidden. */ + public static final int SC_403_FORBIDDEN = 403; + /** 403 error code. */ - public static final int SC_403 = 403; + @Deprecated + public static final int SC_403 = SC_403_FORBIDDEN; + + /** 404 status code: Not Found. */ + public static final int SC_404_NOT_FOUND = 404; /** 404 error code. */ - public static final int SC_404 = 404; + @Deprecated + public static final int SC_404 = SC_404_NOT_FOUND; + + /** 405 status code: Method Not Allowed. */ + public static final int SC_405_METHOD_NOT_ALLOWED = 405; + + /** 410 status code: Gone. */ + public static final int SC_410_GONE = 410; + + /** 412 status code: Precondition Failed. */ + public static final int SC_412_PRECONDITION_FAILED = 412; + + /** 416 status code: Range Not Satisfiable. */ + public static final int SC_416_RANGE_NOT_SATISFIABLE = 416; + + /** 443 status code: No Response (unofficial). */ + public static final int SC_443_NO_RESPONSE = 443; + + /** 444 status code: No Response (unofficial). */ + public static final int SC_444_NO_RESPONSE = 444; + + /** 500 status code: Internal Server Error. */ + public static final int SC_500_INTERNAL_SERVER_ERROR = 500; + + /** 503 status code: Service Unavailable. */ + public static final int SC_503_SERVICE_UNAVAILABLE = 503; /** Name of the log for throttling events. Value: {@value}. */ public static final String THROTTLE_LOG_NAME = diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteSupport.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java similarity index 62% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteSupport.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java index 96e32f362df..6082c2f08da 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteSupport.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java @@ -22,26 +22,32 @@ import java.io.IOException; import java.nio.file.AccessDeniedException; import java.util.List; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; +import software.amazon.awssdk.services.s3.model.S3Error; +import software.amazon.awssdk.services.s3.model.S3Exception; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.s3a.AWSS3IOException; +import org.apache.hadoop.fs.s3a.S3AFileSystem; + +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_200_OK; /** - * Support for Multi Object Deletion. - * This is used to be a complex piece of code as it was required to - * update s3guard. - * Now all that is left is the exception extraction for better - * reporting, + * Exception raised in {@link S3AFileSystem#deleteObjects} when + * one or more of the keys could not be deleted. + * + * Used to reproduce the behaviour of SDK v1 for partial failures + * on DeleteObjects. In SDK v2, the errors are returned as part of + * the response objects. */ -public final class MultiObjectDeleteSupport { +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class MultiObjectDeleteException extends S3Exception { private static final Logger LOG = LoggerFactory.getLogger( - MultiObjectDeleteSupport.class); - - private MultiObjectDeleteSupport() { - } + MultiObjectDeleteException.class); /** * This is the exception exit code if access was denied on a delete. @@ -49,6 +55,17 @@ public final class MultiObjectDeleteSupport { */ public static final String ACCESS_DENIED = "AccessDenied"; + private final List errors; + + public MultiObjectDeleteException(List errors) { + super(builder().message(errors.toString()).statusCode(SC_200_OK)); + this.errors = errors; + } + + public List errors() { + return errors; + } + /** * A {@code MultiObjectDeleteException} is raised if one or more * paths listed in a bulk DELETE operation failed. @@ -58,29 +75,23 @@ public final class MultiObjectDeleteSupport { * the causes, otherwise grabs the status code and uses it in the * returned exception. * @param message text for the exception - * @param deleteException the delete exception. to translate * @return an IOE with more detail. */ - public static IOException translateDeleteException( - final String message, - final MultiObjectDeleteException deleteException) { - List errors - = deleteException.getErrors(); + public IOException translateException(final String message) { LOG.info("Bulk delete operation failed to delete all objects;" + " failure count = {}", - errors.size()); + errors().size()); final StringBuilder result = new StringBuilder( - errors.size() * 256); + errors().size() * 256); result.append(message).append(": "); String exitCode = ""; - for (MultiObjectDeleteException.DeleteError error : - deleteException.getErrors()) { - String code = error.getCode(); - String item = String.format("%s: %s%s: %s%n", code, error.getKey(), - (error.getVersionId() != null - ? (" (" + error.getVersionId() + ")") + for (S3Error error : errors()) { + String code = error.code(); + String item = String.format("%s: %s%s: %s%n", code, error.key(), + (error.versionId() != null + ? (" (" + error.versionId() + ")") : ""), - error.getMessage()); + error.message()); LOG.info(item); result.append(item); if (exitCode == null || exitCode.isEmpty() || ACCESS_DENIED.equals(code)) { @@ -89,9 +100,9 @@ public final class MultiObjectDeleteSupport { } if (ACCESS_DENIED.equals(exitCode)) { return (IOException) new AccessDeniedException(result.toString()) - .initCause(deleteException); + .initCause(this); } else { - return new AWSS3IOException(result.toString(), deleteException); + return new AWSS3IOException(result.toString(), this); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java index 575a3d1b2de..34b4049b061 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java @@ -23,9 +23,9 @@ import java.lang.reflect.InvocationTargetException; import java.net.URI; import java.net.URISyntaxException; -import com.amazonaws.ClientConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.http.apache.ApacheHttpClient; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetUtils; @@ -56,13 +56,12 @@ public final class NetworkBinding { * so as to avoid * @param conf the {@link Configuration} used to get the client specified * value of {@code SSL_CHANNEL_MODE} - * @param awsConf the {@code ClientConfiguration} to set the - * SSLConnectionSocketFactory for. + * @param httpClientBuilder the http client builder. * @throws IOException if there is an error while initializing the * {@code SSLSocketFactory} other than classloader problems. */ public static void bindSSLChannelMode(Configuration conf, - ClientConfiguration awsConf) throws IOException { + ApacheHttpClient.Builder httpClientBuilder) throws IOException { // Validate that SSL_CHANNEL_MODE is set to a valid value. String channelModeString = conf.getTrimmed( @@ -89,7 +88,7 @@ public final class NetworkBinding { (Class) Class.forName(BINDING_CLASSNAME); clazz.getConstructor() .newInstance() - .configureSocketFactory(awsConf, channelMode); + .configureSocketFactory(httpClientBuilder, channelMode); } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException | LinkageError e) { @@ -103,7 +102,7 @@ public final class NetworkBinding { * works with the shaded AWS libraries to exist in their own class. */ interface ConfigureAWSSocketFactory { - void configureSocketFactory(ClientConfiguration awsConf, + void configureSocketFactory(ApacheHttpClient.Builder httpClientBuilder, DelegatingSSLSocketFactory.SSLChannelMode channelMode) throws IOException; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java index 5d17ae91b81..e0d9c7c6aad 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java @@ -22,10 +22,9 @@ import java.io.IOException; import java.io.InterruptedIOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.transfer.model.CopyResult; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.InvalidRequestException; @@ -127,7 +126,7 @@ public interface OperationCallbacks { * @throws IOException Other IO problems */ @Retries.RetryTranslated - CopyResult copyFile(String srcKey, + CopyObjectResponse copyFile(String srcKey, String destKey, S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) @@ -142,14 +141,14 @@ public interface OperationCallbacks { * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. - * @throws AmazonClientException amazon-layer failure. + * @throws AwsServiceException amazon-layer failure. * @throws IOException other IO Exception. */ @Retries.RetryRaw void removeKeys( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java new file mode 100644 index 00000000000..5e4c3cf37e5 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +/** + * Interface for progress listeners to implement. + */ +public interface ProgressListener { + default void progressChanged(ProgressListenerEvent eventType, long bytesTransferred) {}; +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java new file mode 100644 index 00000000000..f3f9fb61e43 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +/** + * Enum for progress listener events. + */ +public enum ProgressListenerEvent { + REQUEST_BYTE_TRANSFER_EVENT, + TRANSFER_PART_STARTED_EVENT, + TRANSFER_PART_COMPLETED_EVENT, + TRANSFER_PART_FAILED_EVENT; +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java index ae4d2fe7a34..4bb15f74965 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java @@ -25,9 +25,8 @@ import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AmazonClientException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,7 +121,7 @@ public class RenameOperation extends ExecutingStoreOperation { /** * list of keys to delete on the next (bulk) delete call. */ - private final List keysToDelete = + private final List keysToDelete = new ArrayList<>(); /** @@ -199,7 +198,7 @@ public class RenameOperation extends ExecutingStoreOperation { */ private void queueToDelete(Path path, String key) { LOG.debug("Queueing to delete {}", path); - keysToDelete.add(new DeleteObjectsRequest.KeyVersion(key)); + keysToDelete.add(ObjectIdentifier.builder().key(key).build()); } /** @@ -268,7 +267,7 @@ public class RenameOperation extends ExecutingStoreOperation { } else { recursiveDirectoryRename(); } - } catch (AmazonClientException | IOException ex) { + } catch (SdkException | IOException ex) { // rename failed. // block for all ongoing copies to complete, successfully or not try { @@ -572,7 +571,7 @@ public class RenameOperation extends ExecutingStoreOperation { */ @Retries.RetryTranslated private void removeSourceObjects( - final List keys) + final List keys) throws IOException { // remove the keys @@ -580,9 +579,9 @@ public class RenameOperation extends ExecutingStoreOperation { // who is trying to debug why objects are no longer there. if (LOG.isDebugEnabled()) { LOG.debug("Initiating delete operation for {} objects", keys.size()); - for (DeleteObjectsRequest.KeyVersion key : keys) { - LOG.debug(" {} {}", key.getKey(), - key.getVersion() != null ? key.getVersion() : ""); + for (ObjectIdentifier objectIdentifier : keys) { + LOG.debug(" {} {}", objectIdentifier.key(), + objectIdentifier.versionId() != null ? objectIdentifier.versionId() : ""); } } @@ -619,10 +618,10 @@ public class RenameOperation extends ExecutingStoreOperation { protected IOException convertToIOException(final Exception ex) { if (ex instanceof IOException) { return (IOException) ex; - } else if (ex instanceof SdkBaseException) { + } else if (ex instanceof SdkException) { return translateException("rename " + sourcePath + " to " + destPath, sourcePath.toString(), - (SdkBaseException) ex); + (SdkException) ex); } else { // should never happen, but for completeness return new IOException(ex); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 614ed32cc75..170bd2b3345 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -18,38 +18,36 @@ package org.apache.hadoop.fs.s3a.impl; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; +import java.util.Base64; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import javax.annotation.Nullable; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.MetadataDirective; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.ServerSideEncryption; +import software.amazon.awssdk.services.s3.model.StorageClass; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.utils.Md5Utils; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,8 +77,8 @@ import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions. * This is where audit span information is added to the requests, * until it is done in the AWS SDK itself. * - * All created requests will be passed through - * {@link PrepareRequest#prepareRequest(AmazonWebServiceRequest)} before + * All created request builders will be passed to + * {@link PrepareRequest#prepareRequest(SdkRequest.Builder)} before * being returned to the caller. */ public class RequestFactoryImpl implements RequestFactory { @@ -101,7 +99,7 @@ public class RequestFactoryImpl implements RequestFactory { /** * ACL For new objects. */ - private final CannedAccessControlList cannedACL; + private final String cannedACL; /** * Max number of multipart entries allowed in a large @@ -147,14 +145,15 @@ public class RequestFactoryImpl implements RequestFactory { /** * Preflight preparation of AWS request. - * @param web service request - * @return prepared entry. + * @param web service request builder + * @return prepared builder. */ @Retries.OnceRaw - private T prepareRequest(T t) { - return requestPreparer != null - ? requestPreparer.prepareRequest(t) - : t; + private T prepareRequest(T t) { + if (requestPreparer != null) { + requestPreparer.prepareRequest(t); + } + return t; } /** @@ -162,7 +161,7 @@ public class RequestFactoryImpl implements RequestFactory { * @return an ACL, if any */ @Override - public CannedAccessControlList getCannedACL() { + public String getCannedACL() { return cannedACL; } @@ -174,29 +173,6 @@ public class RequestFactoryImpl implements RequestFactory { return bucket; } - /** - * Create the AWS SDK structure used to configure SSE, - * if the encryption secrets contain the information/settings for this. - * @return an optional set of KMS Key settings - */ - @Override - public Optional generateSSEAwsKeyParams() { - return EncryptionSecretOperations.createSSEAwsKeyManagementParams( - encryptionSecrets); - } - - /** - * Create the SSE-C structure for the AWS SDK, if the encryption secrets - * contain the information/settings for this. - * This will contain a secret extracted from the bucket/configuration. - * @return an optional customer key. - */ - @Override - public Optional generateSSECustomerKey() { - return EncryptionSecretOperations.createSSECustomerKey( - encryptionSecrets); - } - /** * Get the encryption algorithm of this endpoint. * @return the encryption algorithm. @@ -227,309 +203,323 @@ public class RequestFactoryImpl implements RequestFactory { /** * Sets server side encryption parameters to the part upload * request when encryption is enabled. - * @param request upload part request + * @param builder upload part request builder */ - protected void setOptionalUploadPartRequestParameters( - UploadPartRequest request) { - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); + protected void uploadPartEncryptionParameters( + UploadPartRequest.Builder builder) { + // need to set key to get objects encrypted with SSE_C + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); } - /** - * Sets server side encryption parameters to the GET reuquest. - * request when encryption is enabled. - * @param request upload part request - */ - protected void setOptionalGetObjectMetadataParameters( - GetObjectMetadataRequest request) { - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - } + private CopyObjectRequest.Builder buildCopyObjectRequest() { - /** - * Set the optional parameters when initiating the request (encryption, - * headers, storage, etc). - * @param request request to patch. - */ - protected void setOptionalMultipartUploadRequestParameters( - InitiateMultipartUploadRequest request) { - generateSSEAwsKeyParams().ifPresent(request::setSSEAwsKeyManagementParams); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - } + CopyObjectRequest.Builder copyObjectRequestBuilder = CopyObjectRequest.builder(); - /** - * Set the optional parameters for a PUT request. - * @param request request to patch. - */ - protected void setOptionalPutRequestParameters(PutObjectRequest request) { - generateSSEAwsKeyParams().ifPresent(request::setSSEAwsKeyManagementParams); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - } - - /** - * Set the optional metadata for an object being created or copied. - * @param metadata to update. - * @param isDirectoryMarker is this for a directory marker? - */ - protected void setOptionalObjectMetadata(ObjectMetadata metadata, - boolean isDirectoryMarker) { - final S3AEncryptionMethods algorithm - = getServerSideEncryptionAlgorithm(); - if (S3AEncryptionMethods.SSE_S3 == algorithm) { - metadata.setSSEAlgorithm(algorithm.getMethod()); + if (contentEncoding != null) { + copyObjectRequestBuilder.contentEncoding(contentEncoding); } - if (contentEncoding != null && !isDirectoryMarker) { - metadata.setContentEncoding(contentEncoding); - } - } - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header; Ignored if negative - * @return a new metadata instance - */ - @Override - public ObjectMetadata newObjectMetadata(long length) { - return createObjectMetadata(length, false); - } - - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header; Ignored if negative - * @param isDirectoryMarker is this for a directory marker? - * @return a new metadata instance - */ - private ObjectMetadata createObjectMetadata(long length, boolean isDirectoryMarker) { - final ObjectMetadata om = new ObjectMetadata(); - setOptionalObjectMetadata(om, isDirectoryMarker); - if (length >= 0) { - om.setContentLength(length); - } - return om; + return copyObjectRequestBuilder; } @Override - public CopyObjectRequest newCopyObjectRequest(String srcKey, + public CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey, String dstKey, - ObjectMetadata srcom) { - CopyObjectRequest copyObjectRequest = - new CopyObjectRequest(getBucket(), srcKey, getBucket(), dstKey); - ObjectMetadata dstom = newObjectMetadata(srcom.getContentLength()); - HeaderProcessing.cloneObjectMetadata(srcom, dstom); - setOptionalObjectMetadata(dstom, false); - copyEncryptionParameters(srcom, copyObjectRequest); - copyObjectRequest.setCannedAccessControlList(cannedACL); - copyObjectRequest.setNewObjectMetadata(dstom); - Optional.ofNullable(srcom.getStorageClass()) - .ifPresent(copyObjectRequest::setStorageClass); - return prepareRequest(copyObjectRequest); + HeadObjectResponse srcom) { + + CopyObjectRequest.Builder copyObjectRequestBuilder = buildCopyObjectRequest(); + + Map dstom = new HashMap<>(); + HeaderProcessing.cloneObjectMetadata(srcom, dstom, copyObjectRequestBuilder); + copyEncryptionParameters(srcom, copyObjectRequestBuilder); + + copyObjectRequestBuilder + .metadata(dstom) + .metadataDirective(MetadataDirective.REPLACE) + .acl(cannedACL); + + if (srcom.storageClass() != null) { + copyObjectRequestBuilder.storageClass(srcom.storageClass()); + } + + copyObjectRequestBuilder.destinationBucket(getBucket()) + .destinationKey(dstKey).sourceBucket(getBucket()).sourceKey(srcKey); + + return prepareRequest(copyObjectRequestBuilder); } /** * Propagate encryption parameters from source file if set else use the * current filesystem encryption settings. + * @param copyObjectRequestBuilder copy object request builder. * @param srcom source object metadata. - * @param copyObjectRequest copy object request body. */ - protected void copyEncryptionParameters( - ObjectMetadata srcom, - CopyObjectRequest copyObjectRequest) { - String sourceKMSId = srcom.getSSEAwsKmsKeyId(); + protected void copyEncryptionParameters(HeadObjectResponse srcom, + CopyObjectRequest.Builder copyObjectRequestBuilder) { + + final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm(); + + String sourceKMSId = srcom.ssekmsKeyId(); if (isNotEmpty(sourceKMSId)) { // source KMS ID is propagated LOG.debug("Propagating SSE-KMS settings from source {}", sourceKMSId); - copyObjectRequest.setSSEAwsKeyManagementParams( - new SSEAwsKeyManagementParams(sourceKMSId)); + copyObjectRequestBuilder.ssekmsKeyId(sourceKMSId); + return; } - switch (getServerSideEncryptionAlgorithm()) { - case SSE_S3: - /* no-op; this is set in destination object metadata */ - break; - case SSE_C: - generateSSECustomerKey().ifPresent(customerKey -> { - copyObjectRequest.setSourceSSECustomerKey(customerKey); - copyObjectRequest.setDestinationSSECustomerKey(customerKey); - }); - break; - - case SSE_KMS: - generateSSEAwsKeyParams().ifPresent( - copyObjectRequest::setSSEAwsKeyManagementParams); - break; - default: + if (S3AEncryptionMethods.SSE_S3 == algorithm) { + copyObjectRequestBuilder.serverSideEncryption(algorithm.getMethod()); + } else if (S3AEncryptionMethods.SSE_KMS == algorithm) { + copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS); + // Set the KMS key if present, else S3 uses AWS managed key. + EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) + .ifPresent(kmsKey -> copyObjectRequestBuilder.ssekmsKeyId(kmsKey)); + } else if (S3AEncryptionMethods.SSE_C == algorithm) { + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) + .ifPresent(base64customerKey -> { + copyObjectRequestBuilder.copySourceSSECustomerAlgorithm( + ServerSideEncryption.AES256.name()).copySourceSSECustomerKey(base64customerKey) + .copySourceSSECustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))) + .sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey).sseCustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); } } /** * Create a putObject request. * Adds the ACL, storage class and metadata * @param key key of object - * @param metadata metadata header * @param options options for the request, including headers - * @param srcfile source file - * @return the request + * @param length length of object to be uploaded + * @param isDirectoryMarker true if object to be uploaded is a directory marker + * @return the request builder */ @Override - public PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, + public PutObjectRequest.Builder newPutObjectRequestBuilder(String key, final PutObjectOptions options, - File srcfile) { - Preconditions.checkNotNull(srcfile); - PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key, - srcfile); - maybeSetMetadata(options, metadata); - setOptionalPutRequestParameters(putObjectRequest); - putObjectRequest.setCannedAcl(cannedACL); - if (storageClass != null) { - putObjectRequest.setStorageClass(storageClass); - } - putObjectRequest.setMetadata(metadata); - return prepareRequest(putObjectRequest); - } + long length, + boolean isDirectoryMarker) { - /** - * Create a {@link PutObjectRequest} request. - * The metadata is assumed to have been configured with the size of the - * operation. - * @param key key of object - * @param metadata metadata header - * @param options options for the request - * @param inputStream source data. - * @return the request - */ - @Override - public PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, - @Nullable final PutObjectOptions options, - InputStream inputStream) { - Preconditions.checkNotNull(inputStream); Preconditions.checkArgument(isNotEmpty(key), "Null/empty key"); - maybeSetMetadata(options, metadata); - PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key, - inputStream, metadata); - setOptionalPutRequestParameters(putObjectRequest); - putObjectRequest.setCannedAcl(cannedACL); - if (storageClass != null) { - putObjectRequest.setStorageClass(storageClass); + + PutObjectRequest.Builder putObjectRequestBuilder = + buildPutObjectRequest(length, isDirectoryMarker); + putObjectRequestBuilder.bucket(getBucket()).key(key); + + if (options != null) { + putObjectRequestBuilder.metadata(options.getHeaders()); + } + + putEncryptionParameters(putObjectRequestBuilder); + + if (storageClass != null) { + putObjectRequestBuilder.storageClass(storageClass); + } + + return prepareRequest(putObjectRequestBuilder); + } + + private PutObjectRequest.Builder buildPutObjectRequest(long length, boolean isDirectoryMarker) { + + PutObjectRequest.Builder putObjectRequestBuilder = PutObjectRequest.builder(); + + putObjectRequestBuilder.acl(cannedACL); + + if (length >= 0) { + putObjectRequestBuilder.contentLength(length); + } + + if (contentEncoding != null && !isDirectoryMarker) { + putObjectRequestBuilder.contentEncoding(contentEncoding); + } + + return putObjectRequestBuilder; + } + + private void putEncryptionParameters(PutObjectRequest.Builder putObjectRequestBuilder) { + final S3AEncryptionMethods algorithm + = getServerSideEncryptionAlgorithm(); + + if (S3AEncryptionMethods.SSE_S3 == algorithm) { + putObjectRequestBuilder.serverSideEncryption(algorithm.getMethod()); + } else if (S3AEncryptionMethods.SSE_KMS == algorithm) { + putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS); + // Set the KMS key if present, else S3 uses AWS managed key. + EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) + .ifPresent(kmsKey -> putObjectRequestBuilder.ssekmsKeyId(kmsKey)); + } else if (S3AEncryptionMethods.SSE_C == algorithm) { + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) + .ifPresent(base64customerKey -> { + putObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey).sseCustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); } - return prepareRequest(putObjectRequest); } @Override - public PutObjectRequest newDirectoryMarkerRequest(String directory) { + public PutObjectRequest.Builder newDirectoryMarkerRequest(String directory) { String key = directory.endsWith("/") ? directory : (directory + "/"); - // an input stream which is always empty - final InputStream inputStream = new InputStream() { - @Override - public int read() throws IOException { - return -1; - } - }; + // preparation happens in here - final ObjectMetadata metadata = createObjectMetadata(0L, true); - metadata.setContentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY); + PutObjectRequest.Builder putObjectRequestBuilder = buildPutObjectRequest(0L, true); - PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key, - inputStream, metadata); - setOptionalPutRequestParameters(putObjectRequest); - putObjectRequest.setCannedAcl(cannedACL); - return prepareRequest(putObjectRequest); + putObjectRequestBuilder.bucket(getBucket()).key(key) + .contentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY); + + putEncryptionParameters(putObjectRequestBuilder); + + return prepareRequest(putObjectRequestBuilder); } @Override - public ListMultipartUploadsRequest - newListMultipartUploadsRequest(String prefix) { - ListMultipartUploadsRequest request = new ListMultipartUploadsRequest( - getBucket()); + public ListMultipartUploadsRequest.Builder + newListMultipartUploadsRequestBuilder(String prefix) { + + ListMultipartUploadsRequest.Builder requestBuilder = ListMultipartUploadsRequest.builder(); + + requestBuilder.bucket(getBucket()); if (prefix != null) { - request.setPrefix(prefix); + requestBuilder.prefix(prefix); } - return prepareRequest(request); + return prepareRequest(requestBuilder); } @Override - public AbortMultipartUploadRequest newAbortMultipartUploadRequest( + public AbortMultipartUploadRequest.Builder newAbortMultipartUploadRequestBuilder( String destKey, String uploadId) { - return prepareRequest(new AbortMultipartUploadRequest(getBucket(), - destKey, - uploadId)); + AbortMultipartUploadRequest.Builder requestBuilder = + AbortMultipartUploadRequest.builder().bucket(getBucket()).key(destKey).uploadId(uploadId); + + return prepareRequest(requestBuilder); + } + + private void multipartUploadEncryptionParameters( + CreateMultipartUploadRequest.Builder mpuRequestBuilder) { + final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm(); + + if (S3AEncryptionMethods.SSE_S3 == algorithm) { + mpuRequestBuilder.serverSideEncryption(algorithm.getMethod()); + } else if (S3AEncryptionMethods.SSE_KMS == algorithm) { + mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS); + // Set the KMS key if present, else S3 uses AWS managed key. + EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) + .ifPresent(kmsKey -> mpuRequestBuilder.ssekmsKeyId(kmsKey)); + } else if (S3AEncryptionMethods.SSE_C == algorithm) { + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) + .ifPresent(base64customerKey -> { + mpuRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey).sseCustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); + } } @Override - public InitiateMultipartUploadRequest newMultipartUploadRequest( + public CreateMultipartUploadRequest.Builder newMultipartUploadRequestBuilder( final String destKey, @Nullable final PutObjectOptions options) throws PathIOException { if (!isMultipartUploadEnabled) { throw new PathIOException(destKey, "Multipart uploads are disabled."); } - final ObjectMetadata objectMetadata = newObjectMetadata(-1); - maybeSetMetadata(options, objectMetadata); - final InitiateMultipartUploadRequest initiateMPURequest = - new InitiateMultipartUploadRequest(getBucket(), - destKey, - objectMetadata); - initiateMPURequest.setCannedACL(getCannedACL()); - if (getStorageClass() != null) { - initiateMPURequest.withStorageClass(getStorageClass()); + + CreateMultipartUploadRequest.Builder requestBuilder = CreateMultipartUploadRequest.builder(); + + if (contentEncoding != null) { + requestBuilder.contentEncoding(contentEncoding); } - setOptionalMultipartUploadRequestParameters(initiateMPURequest); - return prepareRequest(initiateMPURequest); + + if (options != null) { + requestBuilder.metadata(options.getHeaders()); + } + + requestBuilder.bucket(getBucket()).key(destKey).acl(cannedACL); + + multipartUploadEncryptionParameters(requestBuilder); + + if (storageClass != null) { + requestBuilder.storageClass(storageClass); + } + + return prepareRequest(requestBuilder); } @Override - public CompleteMultipartUploadRequest newCompleteMultipartUploadRequest( + public CompleteMultipartUploadRequest.Builder newCompleteMultipartUploadRequestBuilder( String destKey, String uploadId, - List partETags) { + List partETags) { // a copy of the list is required, so that the AWS SDK doesn't // attempt to sort an unmodifiable list. - return prepareRequest(new CompleteMultipartUploadRequest(bucket, - destKey, uploadId, new ArrayList<>(partETags))); + CompleteMultipartUploadRequest.Builder requestBuilder = + CompleteMultipartUploadRequest.builder().bucket(bucket).key(destKey).uploadId(uploadId) + .multipartUpload(CompletedMultipartUpload.builder().parts(partETags).build()); + return prepareRequest(requestBuilder); } @Override - public GetObjectMetadataRequest newGetObjectMetadataRequest(String key) { - GetObjectMetadataRequest request = - new GetObjectMetadataRequest(getBucket(), key); - //SSE-C requires to be filled in if enabled for object metadata - setOptionalGetObjectMetadataParameters(request); - return prepareRequest(request); + public HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key) { + + HeadObjectRequest.Builder headObjectRequestBuilder = + HeadObjectRequest.builder().bucket(getBucket()).key(key); + + // need to set key to get metadata for objects encrypted with SSE_C + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + headObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); + + return prepareRequest(headObjectRequestBuilder); } @Override - public GetObjectRequest newGetObjectRequest(String key) { - GetObjectRequest request = new GetObjectRequest(bucket, key); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); + public HeadBucketRequest.Builder newHeadBucketRequestBuilder(String bucketName) { - return prepareRequest(request); + HeadBucketRequest.Builder headBucketRequestBuilder = + HeadBucketRequest.builder().bucket(bucketName); + + return prepareRequest(headBucketRequestBuilder); } @Override - public UploadPartRequest newUploadPartRequest( + public GetObjectRequest.Builder newGetObjectRequestBuilder(String key) { + GetObjectRequest.Builder builder = GetObjectRequest.builder() + .bucket(bucket) + .key(key); + + // need to set key to get objects encrypted with SSE_C + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); + + return prepareRequest(builder); + } + + @Override + public UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - long offset) throws PathIOException { + long size) throws PathIOException { checkNotNull(uploadId); - // exactly one source must be set; xor verifies this - checkArgument((uploadStream != null) ^ (sourceFile != null), - "Data source"); checkArgument(size >= 0, "Invalid partition size %s", size); checkArgument(partNumber > 0, "partNumber must be between 1 and %s inclusive, but is %s", - DEFAULT_UPLOAD_PART_COUNT_LIMIT, partNumber); + multipartPartCountLimit, partNumber); LOG.debug("Creating part upload request for {} #{} size {}", uploadId, partNumber, size); @@ -539,88 +529,76 @@ public class RequestFactoryImpl implements RequestFactory { throw new PathIOException(destKey, String.format(pathErrorMsg, partNumber, multipartPartCountLimit)); } - UploadPartRequest request = new UploadPartRequest() - .withBucketName(getBucket()) - .withKey(destKey) - .withUploadId(uploadId) - .withPartNumber(partNumber) - .withPartSize(size); - if (uploadStream != null) { - // there's an upload stream. Bind to it. - request.setInputStream(uploadStream); - } else { - checkArgument(sourceFile.exists(), - "Source file does not exist: %s", sourceFile); - checkArgument(sourceFile.isFile(), - "Source is not a file: %s", sourceFile); - checkArgument(offset >= 0, "Invalid offset %s", offset); - long length = sourceFile.length(); - checkArgument(offset == 0 || offset < length, - "Offset %s beyond length of file %s", offset, length); - request.setFile(sourceFile); - request.setFileOffset(offset); - } - setOptionalUploadPartRequestParameters(request); - return prepareRequest(request); + UploadPartRequest.Builder builder = UploadPartRequest.builder() + .bucket(getBucket()) + .key(destKey) + .uploadId(uploadId) + .partNumber(partNumber) + .contentLength(size); + uploadPartEncryptionParameters(builder); + return prepareRequest(builder); } @Override - public SelectObjectContentRequest newSelectRequest(String key) { - SelectObjectContentRequest request = new SelectObjectContentRequest(); - request.setBucketName(bucket); - request.setKey(key); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - return prepareRequest(request); + public SelectObjectContentRequest.Builder newSelectRequestBuilder(String key) { + SelectObjectContentRequest.Builder requestBuilder = + SelectObjectContentRequest.builder().bucket(bucket).key(key); + + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + requestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); + + return prepareRequest(requestBuilder); } @Override - public ListObjectsRequest newListObjectsV1Request( + public ListObjectsRequest.Builder newListObjectsV1RequestBuilder( final String key, final String delimiter, final int maxKeys) { - ListObjectsRequest request = new ListObjectsRequest() - .withBucketName(bucket) - .withMaxKeys(maxKeys) - .withPrefix(key); + + ListObjectsRequest.Builder requestBuilder = + ListObjectsRequest.builder().bucket(bucket).maxKeys(maxKeys).prefix(key); + if (delimiter != null) { - request.setDelimiter(delimiter); + requestBuilder.delimiter(delimiter); } - return prepareRequest(request); + + return prepareRequest(requestBuilder); } @Override - public ListNextBatchOfObjectsRequest newListNextBatchOfObjectsRequest( - ObjectListing prev) { - return prepareRequest(new ListNextBatchOfObjectsRequest(prev)); - } - - @Override - public ListObjectsV2Request newListObjectsV2Request( + public ListObjectsV2Request.Builder newListObjectsV2RequestBuilder( final String key, final String delimiter, final int maxKeys) { - final ListObjectsV2Request request = new ListObjectsV2Request() - .withBucketName(bucket) - .withMaxKeys(maxKeys) - .withPrefix(key); + + final ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder() + .bucket(bucket) + .maxKeys(maxKeys) + .prefix(key); + if (delimiter != null) { - request.setDelimiter(delimiter); + requestBuilder.delimiter(delimiter); } - return prepareRequest(request); + + return prepareRequest(requestBuilder); } @Override - public DeleteObjectRequest newDeleteObjectRequest(String key) { - return prepareRequest(new DeleteObjectRequest(bucket, key)); + public DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key) { + return prepareRequest(DeleteObjectRequest.builder().bucket(bucket).key(key)); } @Override - public DeleteObjectsRequest newBulkDeleteRequest( - List keysToDelete) { - return prepareRequest( - new DeleteObjectsRequest(bucket) - .withKeys(keysToDelete) - .withQuiet(true)); + public DeleteObjectsRequest.Builder newBulkDeleteRequestBuilder( + List keysToDelete) { + return prepareRequest(DeleteObjectsRequest + .builder() + .bucket(bucket) + .delete(d -> d.objects(keysToDelete).quiet(true))); } @Override @@ -628,23 +606,6 @@ public class RequestFactoryImpl implements RequestFactory { encryptionSecrets = secrets; } - /** - * Set the metadata from the options if the options are not - * null and the metadata contains headers. - * @param options options for the request - * @param objectMetadata metadata to patch - */ - private void maybeSetMetadata( - @Nullable PutObjectOptions options, - final ObjectMetadata objectMetadata) { - if (options != null) { - Map headers = options.getHeaders(); - if (headers != null) { - objectMetadata.setUserMetadata(headers); - } - } - } - /** * Create a builder. * @return new builder. @@ -671,7 +632,7 @@ public class RequestFactoryImpl implements RequestFactory { /** * ACL For new objects. */ - private CannedAccessControlList cannedACL = null; + private String cannedACL = null; /** Content Encoding. */ private String contentEncoding; @@ -754,7 +715,7 @@ public class RequestFactoryImpl implements RequestFactory { * @return the builder */ public RequestFactoryBuilder withCannedACL( - final CannedAccessControlList value) { + final String value) { cannedACL = value; return this; } @@ -806,11 +767,9 @@ public class RequestFactoryImpl implements RequestFactory { /** * Post-creation preparation of AWS request. - * @param t request - * @param request type. - * @return prepared entry. + * @param t request builder */ @Retries.OnceRaw - T prepareRequest(T t); + void prepareRequest(SdkRequest.Builder t); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java index 4ab5bc6a992..b7eae8ead70 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java @@ -34,10 +34,12 @@ import java.util.Objects; import java.util.Set; import java.util.concurrent.CompletableFuture; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.apache.commons.lang3.StringUtils; @@ -152,18 +154,18 @@ class S3AMultipartUploader extends AbstractMultipartUploader { Charsets.UTF_8); return context.submit(new CompletableFuture<>(), () -> { - UploadPartRequest request = writeOperations.newUploadPartRequest(key, - uploadIdString, partNumber, (int) lengthInBytes, inputStream, - null, 0L); - UploadPartResult result = writeOperations.uploadPart(request, statistics); + UploadPartRequest request = writeOperations.newUploadPartRequestBuilder(key, + uploadIdString, partNumber, lengthInBytes).build(); + RequestBody body = RequestBody.fromInputStream(inputStream, lengthInBytes); + UploadPartResponse response = writeOperations.uploadPart(request, body, statistics); statistics.partPut(lengthInBytes); - String eTag = result.getETag(); + String eTag = response.eTag(); return BBPartHandle.from( ByteBuffer.wrap( buildPartHandlePayload( filePath.toUri().toString(), uploadIdString, - result.getPartNumber(), + partNumber, eTag, lengthInBytes))); }); @@ -188,7 +190,7 @@ class S3AMultipartUploader extends AbstractMultipartUploader { String uploadIdStr = new String(uploadIdBytes, 0, uploadIdBytes.length, Charsets.UTF_8); - ArrayList eTags = new ArrayList<>(); + ArrayList eTags = new ArrayList<>(); eTags.ensureCapacity(handles.size()); long totalLength = 0; // built up to identify duplicates -if the size of this set is @@ -201,7 +203,8 @@ class S3AMultipartUploader extends AbstractMultipartUploader { payload.validate(uploadIdStr, filePath); ids.add(payload.getPartNumber()); totalLength += payload.getLen(); - eTags.add(new PartETag(handle.getKey(), payload.getEtag())); + eTags.add( + CompletedPart.builder().partNumber(handle.getKey()).eTag(payload.getEtag()).build()); } Preconditions.checkArgument(ids.size() == count, "Duplicate PartHandles"); @@ -210,7 +213,7 @@ class S3AMultipartUploader extends AbstractMultipartUploader { long finalLen = totalLength; return context.submit(new CompletableFuture<>(), trackDurationOfCallable(statistics, MULTIPART_UPLOAD_COMPLETED.getSymbol(), () -> { - CompleteMultipartUploadResult result = + CompleteMultipartUploadResponse result = writeOperations.commitUpload( key, uploadIdStr, @@ -218,7 +221,7 @@ class S3AMultipartUploader extends AbstractMultipartUploader { finalLen ); - byte[] eTag = result.getETag().getBytes(Charsets.UTF_8); + byte[] eTag = result.eTag().getBytes(Charsets.UTF_8); statistics.uploadCompleted(); return (PathHandle) () -> ByteBuffer.wrap(eTag); })); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java index b566f9ad427..49c2fb8947d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java @@ -18,12 +18,10 @@ package org.apache.hadoop.fs.s3a.impl; -import java.io.Closeable; +import java.io.InputStream; import java.util.concurrent.atomic.AtomicBoolean; -import javax.annotation.Nullable; - -import com.amazonaws.internal.SdkFilterInputStream; +import software.amazon.awssdk.http.Abortable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,23 +29,18 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.util.functional.CallableRaisingIOE; + import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DRAIN_BUFFER_SIZE; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration; -import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; /** * Drains/aborts s3 or other AWS SDK streams. * It is callable so can be passed directly to a submitter * for async invocation. - * A request object may be passed in; it will be implicitly - * cached until this object is GCd. - * This is because in some versions of the AWS SDK, the S3Object - * has a finalize() method which releases the http connection, - * even when the stream is still open. - * See HADOOP-17338 for details. */ -public class SDKStreamDrainer implements CallableRaisingIOE { +public class SDKStreamDrainer + implements CallableRaisingIOE { private static final Logger LOG = LoggerFactory.getLogger( SDKStreamDrainer.class); @@ -58,17 +51,9 @@ public class SDKStreamDrainer implements CallableRaisingIOE { private final String uri; /** - * Request object; usually S3Object - * Never used, but needed to keep the http connection - * open long enough for draining to take place. + * Stream from the getObject response for draining and closing. */ - @Nullable - private final Closeable requestObject; - - /** - * Stream from the {@link #requestObject} for draining and closing. - */ - private final SdkFilterInputStream sdkStream; + private final TStream sdkStream; /** * Should the request be aborted? @@ -118,7 +103,6 @@ public class SDKStreamDrainer implements CallableRaisingIOE { /** * Prepare to drain the stream. * @param uri URI for messages - * @param requestObject http request object; needed to avoid GC issues. * @param sdkStream stream to close. * @param shouldAbort force an abort; used if explicitly requested. * @param streamStatistics stats to update @@ -126,14 +110,12 @@ public class SDKStreamDrainer implements CallableRaisingIOE { * @param remaining remaining bytes */ public SDKStreamDrainer(final String uri, - @Nullable final Closeable requestObject, - final SdkFilterInputStream sdkStream, + final TStream sdkStream, final boolean shouldAbort, final int remaining, final S3AInputStreamStatistics streamStatistics, final String reason) { this.uri = uri; - this.requestObject = requestObject; this.sdkStream = requireNonNull(sdkStream); this.shouldAbort = shouldAbort; this.remaining = remaining; @@ -233,7 +215,6 @@ public class SDKStreamDrainer implements CallableRaisingIOE { LOG.debug("Closing stream"); sdkStream.close(); - cleanupWithLogger(LOG, requestObject); // this MUST come after the close, so that if the IO operations fail // and an abort is triggered, the initial attempt's statistics // aren't collected. @@ -255,8 +236,6 @@ public class SDKStreamDrainer implements CallableRaisingIOE { LOG.warn("When aborting {} stream after failing to close it for {}", uri, reason, e); thrown = e; - } finally { - cleanupWithLogger(LOG, requestObject); } streamStatistics.streamClose(true, remaining); @@ -269,11 +248,7 @@ public class SDKStreamDrainer implements CallableRaisingIOE { return uri; } - public Object getRequestObject() { - return requestObject; - } - - public SdkFilterInputStream getSdkStream() { + public TStream getSdkStream() { return sdkStream; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java index 3aa8ad270ee..c9156f42047 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java @@ -47,6 +47,9 @@ public final class V2Migration { private static final LogExactlyOnce WARN_OF_CUSTOM_SIGNER = new LogExactlyOnce(SDK_V2_UPGRADE_LOG); + private static final LogExactlyOnce WARN_OF_REQUEST_HANDLERS = + new LogExactlyOnce(SDK_V2_UPGRADE_LOG); + private static final LogExactlyOnce WARN_ON_GET_OBJECT_METADATA = new LogExactlyOnce(SDK_V2_UPGRADE_LOG); @@ -87,6 +90,15 @@ public final class V2Migration { + "once S3A is upgraded to SDK V2"); } + /** + * Warns on use of request handlers. + */ + public static void v1RequestHandlersUsed() { + WARN_OF_REQUEST_HANDLERS.warn( + "The request handler interface has changed in AWS SDK V2, use exception interceptors " + + "once S3A is upgraded to SDK V2"); + } + /** * Warns on use of getObjectMetadata. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java index 3ab0022bb08..ec6e3700226 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java @@ -19,15 +19,11 @@ package org.apache.hadoop.fs.s3a.prefetch; - import java.io.IOException; -import java.io.InputStream; -import java.util.IdentityHashMap; -import java.util.Map; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,12 +31,14 @@ import org.apache.hadoop.fs.impl.prefetch.Validate; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.S3AInputStream; import org.apache.hadoop.fs.s3a.S3AReadOpContext; +import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.impl.ChangeTracker; import org.apache.hadoop.fs.s3a.impl.SDKStreamDrainer; import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.fs.statistics.DurationTracker; + /** * Encapsulates low level interactions with S3 object on AWS. */ @@ -74,12 +72,6 @@ public class S3ARemoteObject { */ private final ChangeTracker changeTracker; - /** - * Maps a stream returned by openForRead() to the associated S3 object. - * That allows us to close the object when closing the stream. - */ - private final Map s3Objects; - /** * uri of the object being read. */ @@ -123,7 +115,6 @@ public class S3ARemoteObject { this.client = client; this.streamStatistics = streamStatistics; this.changeTracker = changeTracker; - this.s3Objects = new IdentityHashMap<>(); this.uri = this.getPath(); } @@ -187,21 +178,23 @@ public class S3ARemoteObject { * @throws IllegalArgumentException if offset is greater than or equal to file size. * @throws IllegalArgumentException if size is greater than the remaining bytes. */ - public InputStream openForRead(long offset, int size) throws IOException { + public ResponseInputStream openForRead(long offset, int size) + throws IOException { Validate.checkNotNegative(offset, "offset"); Validate.checkLessOrEqual(offset, "offset", size(), "size()"); Validate.checkLessOrEqual(size, "size", size() - offset, "size() - offset"); streamStatistics.streamOpened(); - final GetObjectRequest request = - client.newGetRequest(s3Attributes.getKey()) - .withRange(offset, offset + size - 1); - changeTracker.maybeApplyConstraint(request); + final GetObjectRequest request = client + .newGetRequestBuilder(s3Attributes.getKey()) + .range(S3AUtils.formatRange(offset, offset + size - 1)) + .applyMutation(changeTracker::maybeApplyConstraint) + .build(); String operation = String.format( "%s %s at %d", S3AInputStream.OPERATION_OPEN, uri, offset); DurationTracker tracker = streamStatistics.initiateGetRequest(); - S3Object object = null; + ResponseInputStream object = null; try { object = Invoker.once(operation, uri, () -> client.getObject(request)); @@ -212,27 +205,14 @@ public class S3ARemoteObject { tracker.close(); } - changeTracker.processResponse(object, operation, offset); - InputStream stream = object.getObjectContent(); - synchronized (s3Objects) { - s3Objects.put(stream, object); - } - - return stream; + changeTracker.processResponse(object.response(), operation, offset); + return object; } - void close(InputStream inputStream, int numRemainingBytes) { - S3Object obj; - synchronized (s3Objects) { - obj = s3Objects.remove(inputStream); - if (obj == null) { - throw new IllegalArgumentException("inputStream not found"); - } - } + void close(ResponseInputStream inputStream, int numRemainingBytes) { SDKStreamDrainer drainer = new SDKStreamDrainer( uri, - obj, - (S3ObjectInputStream)inputStream, + inputStream, false, numRemainingBytes, streamStatistics, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java index 89ea77d6d0e..b49b2699f91 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java @@ -22,7 +22,6 @@ package org.apache.hadoop.fs.s3a.prefetch; import java.io.Closeable; import java.io.EOFException; import java.io.IOException; -import java.io.InputStream; import java.net.SocketTimeoutException; import java.nio.ByteBuffer; @@ -33,6 +32,9 @@ import org.apache.hadoop.fs.impl.prefetch.Validate; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_REMOTE_BLOCK_READ; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation; @@ -144,7 +146,8 @@ public class S3ARemoteObjectReader implements Closeable { return; } - InputStream inputStream = remoteObject.openForRead(offset, readSize); + ResponseInputStream inputStream = + remoteObject.openForRead(offset, readSize); int numRemainingBytes = readSize; byte[] bytes = new byte[READ_BUFFER_SIZE]; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 608f9168c24..ec68168bd0f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -33,7 +33,7 @@ import java.util.Scanner; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.MultipartUpload; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -694,11 +694,11 @@ public abstract class S3GuardTool extends Configured implements Tool, count++; if (mode == Mode.ABORT || mode == Mode.LIST || verbose) { println(out, "%s%s %s", mode == Mode.ABORT ? "Deleting: " : "", - upload.getKey(), upload.getUploadId()); + upload.key(), upload.uploadId()); } if (mode == Mode.ABORT) { writeOperationHelper - .abortMultipartUpload(upload.getKey(), upload.getUploadId(), + .abortMultipartUpload(upload.key(), upload.uploadId(), true, LOG_EVENT); } } @@ -726,7 +726,7 @@ public abstract class S3GuardTool extends Configured implements Tool, return true; } Date ageDate = new Date(System.currentTimeMillis() - msec); - return ageDate.compareTo(u.getInitiated()) >= 0; + return ageDate.compareTo(Date.from(u.initiated())) >= 0; } private void processArgs(List args, PrintStream out) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java new file mode 100644 index 00000000000..bc47db47c76 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.util.Enumeration; +import java.util.NoSuchElementException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.LinkedBlockingQueue; + +import org.reactivestreams.Subscriber; +import org.reactivestreams.Subscription; + +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; + +/** + * Implements the {@link Enumeration} interface by subscribing to a + * {@link SdkPublisher} instance. The enumeration will buffer a fixed + * number of elements and only request new ones from the publisher + * when they are consumed. Calls to {@link #hasMoreElements()} and + * {@link #nextElement()} may block while waiting for new elements. + * @param the type of element. + */ +public final class BlockingEnumeration implements Enumeration { + private static final class Signal { + private final T element; + private final Throwable error; + + Signal(T element) { + this.element = element; + this.error = null; + } + + Signal(Throwable error) { + this.element = null; + this.error = error; + } + } + + private final Signal END_SIGNAL = new Signal<>((Throwable)null); + private final CompletableFuture subscription = new CompletableFuture<>(); + private final BlockingQueue> signalQueue; + private final int bufferSize; + private Signal current = null; + + /** + * Create an enumeration with a fixed buffer size and an + * optional injected first element. + * @param publisher the publisher feeding the enumeration. + * @param bufferSize the buffer size. + * @param firstElement (optional) first element the enumeration will return. + */ + public BlockingEnumeration(SdkPublisher publisher, + final int bufferSize, + final T firstElement) { + this.signalQueue = new LinkedBlockingQueue<>(); + this.bufferSize = bufferSize; + if (firstElement != null) { + this.current = new Signal<>(firstElement); + } + publisher.subscribe(new EnumerationSubscriber()); + } + + /** + * Create an enumeration with a fixed buffer size. + * @param publisher the publisher feeding the enumeration. + * @param bufferSize the buffer size. + */ + public BlockingEnumeration(SdkPublisher publisher, + final int bufferSize) { + this(publisher, bufferSize, null); + } + + @Override + public boolean hasMoreElements() { + if (current == null) { + try { + current = signalQueue.take(); + } catch (InterruptedException e) { + current = new Signal<>(e); + subscription.thenAccept(Subscription::cancel); + Thread.currentThread().interrupt(); + } + } + if (current.error != null) { + Throwable error = current.error; + current = END_SIGNAL; + if (error instanceof Error) { + throw (Error)error; + } else if (error instanceof SdkException) { + throw (SdkException)error; + } else { + throw SdkException.create("Unexpected error", error); + } + } + return current != END_SIGNAL; + } + + @Override + public T nextElement() { + if (!hasMoreElements()) { + throw new NoSuchElementException(); + } + T element = current.element; + current = null; + subscription.thenAccept(s -> s.request(1)); + return element; + } + + private final class EnumerationSubscriber implements Subscriber { + + @Override + public void onSubscribe(Subscription s) { + long request = bufferSize; + if (current != null) { + request--; + } + if (request > 0) { + s.request(request); + } + subscription.complete(s); + } + + @Override + public void onNext(T t) { + signalQueue.add(new Signal<>(t)); + } + + @Override + public void onError(Throwable t) { + signalQueue.add(new Signal<>(t)); + } + + @Override + public void onComplete() { + signalQueue.add(END_SIGNAL); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java index 150043aea9f..e626fb11afd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java @@ -21,13 +21,13 @@ package org.apache.hadoop.fs.s3a.select; import java.io.IOException; import java.util.Locale; -import com.amazonaws.services.s3.model.CSVInput; -import com.amazonaws.services.s3.model.CSVOutput; -import com.amazonaws.services.s3.model.ExpressionType; -import com.amazonaws.services.s3.model.InputSerialization; -import com.amazonaws.services.s3.model.OutputSerialization; -import com.amazonaws.services.s3.model.QuoteFields; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.CSVInput; +import software.amazon.awssdk.services.s3.model.CSVOutput; +import software.amazon.awssdk.services.s3.model.ExpressionType; +import software.amazon.awssdk.services.s3.model.InputSerialization; +import software.amazon.awssdk.services.s3.model.OutputSerialization; +import software.amazon.awssdk.services.s3.model.QuoteFields; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -145,9 +145,9 @@ public class SelectBinding { Preconditions.checkState(isEnabled(), "S3 Select is not enabled for %s", path); - SelectObjectContentRequest request = operations.newSelectRequest(path); + SelectObjectContentRequest.Builder request = operations.newSelectRequestBuilder(path); buildRequest(request, expression, builderOptions); - return request; + return request.build(); } /** @@ -175,14 +175,14 @@ public class SelectBinding { } boolean sqlInErrors = builderOptions.getBoolean(SELECT_ERRORS_INCLUDE_SQL, errorsIncludeSql); - String expression = request.getExpression(); + String expression = request.expression(); final String errorText = sqlInErrors ? expression : "Select"; if (sqlInErrors) { LOG.info("Issuing SQL request {}", expression); } + SelectEventStreamPublisher selectPublisher = operations.select(path, request, errorText); return new SelectInputStream(readContext, - objectAttributes, - operations.select(path, request, errorText)); + objectAttributes, selectPublisher); } /** @@ -197,14 +197,14 @@ public class SelectBinding { *

  • The default values in {@link SelectConstants}
  • * * - * @param request request to build up + * @param requestBuilder request to build up * @param expression SQL expression * @param builderOptions the options which came in from the openFile builder. * @throws IllegalArgumentException if an option is somehow invalid. * @throws IOException if an option is somehow invalid. */ void buildRequest( - final SelectObjectContentRequest request, + final SelectObjectContentRequest.Builder requestBuilder, final String expression, final Configuration builderOptions) throws IllegalArgumentException, IOException { @@ -213,7 +213,6 @@ public class SelectBinding { final Configuration ownerConf = operations.getConf(); - String inputFormat = builderOptions.get(SELECT_INPUT_FORMAT, SELECT_FORMAT_CSV).toLowerCase(Locale.ENGLISH); Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(inputFormat), @@ -224,34 +223,24 @@ public class SelectBinding { Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(outputFormat), "Unsupported output format %s", outputFormat); - request.setExpressionType(ExpressionType.SQL); - request.setExpression(expandBackslashChars(expression)); - - InputSerialization inputSerialization = buildCsvInputRequest(ownerConf, - builderOptions); - String compression = opt(builderOptions, - ownerConf, - SELECT_INPUT_COMPRESSION, - COMPRESSION_OPT_NONE, - true).toUpperCase(Locale.ENGLISH); - if (isNotEmpty(compression)) { - inputSerialization.setCompressionType(compression); - } - request.setInputSerialization(inputSerialization); - - request.setOutputSerialization(buildCSVOutput(ownerConf, builderOptions)); + requestBuilder.expressionType(ExpressionType.SQL); + requestBuilder.expression(expandBackslashChars(expression)); + requestBuilder.inputSerialization( + buildCsvInput(ownerConf, builderOptions)); + requestBuilder.outputSerialization( + buildCSVOutput(ownerConf, builderOptions)); } /** - * Build the CSV input request. + * Build the CSV input format for a request. * @param ownerConf FS owner configuration * @param builderOptions options on the specific request - * @return the constructed request + * @return the input format * @throws IllegalArgumentException argument failure * @throws IOException validation failure */ - public InputSerialization buildCsvInputRequest( + public InputSerialization buildCsvInput( final Configuration ownerConf, final Configuration builderOptions) throws IllegalArgumentException, IOException { @@ -283,28 +272,35 @@ public class SelectBinding { CSV_INPUT_QUOTE_ESCAPE_CHARACTER_DEFAULT); // CSV input - CSVInput csv = new CSVInput(); - csv.setFieldDelimiter(fieldDelimiter); - csv.setRecordDelimiter(recordDelimiter); - csv.setComments(commentMarker); - csv.setQuoteCharacter(quoteCharacter); + CSVInput.Builder csvBuilder = CSVInput.builder() + .fieldDelimiter(fieldDelimiter) + .recordDelimiter(recordDelimiter) + .comments(commentMarker) + .quoteCharacter(quoteCharacter); if (StringUtils.isNotEmpty(quoteEscapeCharacter)) { - csv.setQuoteEscapeCharacter(quoteEscapeCharacter); + csvBuilder.quoteEscapeCharacter(quoteEscapeCharacter); } - csv.setFileHeaderInfo(headerInfo); - - InputSerialization inputSerialization = new InputSerialization(); - inputSerialization.setCsv(csv); - - return inputSerialization; + csvBuilder.fileHeaderInfo(headerInfo); + InputSerialization.Builder inputSerialization = + InputSerialization.builder() + .csv(csvBuilder.build()); + String compression = opt(builderOptions, + ownerConf, + SELECT_INPUT_COMPRESSION, + COMPRESSION_OPT_NONE, + true).toUpperCase(Locale.ENGLISH); + if (isNotEmpty(compression)) { + inputSerialization.compressionType(compression); + } + return inputSerialization.build(); } /** - * Build CSV output for a request. + * Build CSV output format for a request. * @param ownerConf FS owner configuration * @param builderOptions options on the specific request - * @return the constructed request + * @return the output format * @throws IllegalArgumentException argument failure * @throws IOException validation failure */ @@ -333,21 +329,19 @@ public class SelectBinding { CSV_OUTPUT_QUOTE_FIELDS, CSV_OUTPUT_QUOTE_FIELDS_ALWAYS).toUpperCase(Locale.ENGLISH); - // output is CSV, always - OutputSerialization outputSerialization - = new OutputSerialization(); - CSVOutput csvOut = new CSVOutput(); - csvOut.setQuoteCharacter(quoteCharacter); - csvOut.setQuoteFields( - QuoteFields.fromValue(quoteFields)); - csvOut.setFieldDelimiter(fieldDelimiter); - csvOut.setRecordDelimiter(recordDelimiter); + CSVOutput.Builder csvOutputBuilder = CSVOutput.builder() + .quoteCharacter(quoteCharacter) + .quoteFields(QuoteFields.fromValue(quoteFields)) + .fieldDelimiter(fieldDelimiter) + .recordDelimiter(recordDelimiter); if (!quoteEscapeCharacter.isEmpty()) { - csvOut.setQuoteEscapeCharacter(quoteEscapeCharacter); + csvOutputBuilder.quoteEscapeCharacter(quoteEscapeCharacter); } - outputSerialization.setCsv(csvOut); - return outputSerialization; + // output is CSV, always + return OutputSerialization.builder() + .csv(csvOutputBuilder.build()) + .build(); } /** @@ -359,18 +353,18 @@ public class SelectBinding { public static String toString(final SelectObjectContentRequest request) { StringBuilder sb = new StringBuilder(); sb.append("SelectObjectContentRequest{") - .append("bucket name=").append(request.getBucketName()) - .append("; key=").append(request.getKey()) - .append("; expressionType=").append(request.getExpressionType()) - .append("; expression=").append(request.getExpression()); - InputSerialization input = request.getInputSerialization(); + .append("bucket name=").append(request.bucket()) + .append("; key=").append(request.key()) + .append("; expressionType=").append(request.expressionType()) + .append("; expression=").append(request.expression()); + InputSerialization input = request.inputSerialization(); if (input != null) { sb.append("; Input") .append(input.toString()); } else { sb.append("; Input Serialization: none"); } - OutputSerialization out = request.getOutputSerialization(); + OutputSerialization out = request.outputSerialization(); if (out != null) { sb.append("; Output") .append(out.toString()); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java new file mode 100644 index 00000000000..c71ea5f1623 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.SequenceInputStream; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; + +import org.reactivestreams.Subscriber; + +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.EndEvent; +import software.amazon.awssdk.services.s3.model.RecordsEvent; +import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; +import software.amazon.awssdk.utils.ToString; + +/** + * Async publisher of {@link SelectObjectContentEventStream}s returned + * from a SelectObjectContent call. + */ +public final class SelectEventStreamPublisher implements + SdkPublisher { + + private final CompletableFuture selectOperationFuture; + private final SelectObjectContentResponse response; + private final SdkPublisher publisher; + + /** + * Create the publisher. + * @param selectOperationFuture SelectObjectContent future + * @param response SelectObjectContent response + * @param publisher SelectObjectContentEventStream publisher to wrap + */ + public SelectEventStreamPublisher( + CompletableFuture selectOperationFuture, + SelectObjectContentResponse response, + SdkPublisher publisher) { + this.selectOperationFuture = selectOperationFuture; + this.response = response; + this.publisher = publisher; + } + + /** + * Retrieve an input stream to the subset of the S3 object that matched the select query. + * This is equivalent to loading the content of all RecordsEvents into an InputStream. + * This will lazily-load the content from S3, minimizing the amount of memory used. + * @param onEndEvent callback on the end event + * @return the input stream + */ + public AbortableInputStream toRecordsInputStream(Consumer onEndEvent) { + SdkPublisher recordInputStreams = this.publisher + .filter(e -> { + if (e instanceof RecordsEvent) { + return true; + } else if (e instanceof EndEvent) { + onEndEvent.accept((EndEvent) e); + } + return false; + }) + .map(e -> ((RecordsEvent) e).payload().asInputStream()); + + // Subscribe to the async publisher using an enumeration that will + // buffer a single chunk (RecordsEvent's payload) at a time and + // block until it is consumed. + // Also inject an empty stream as the first element that + // SequenceInputStream will request on construction. + BlockingEnumeration enumeration = + new BlockingEnumeration(recordInputStreams, 1, EMPTY_STREAM); + return AbortableInputStream.create( + new SequenceInputStream(enumeration), + this::cancel); + } + + /** + * The response from the SelectObjectContent call. + * @return the response object + */ + public SelectObjectContentResponse response() { + return response; + } + + @Override + public void subscribe(Subscriber subscriber) { + publisher.subscribe(subscriber); + } + + /** + * Cancel the operation. + */ + public void cancel() { + selectOperationFuture.cancel(true); + } + + @Override + public String toString() { + return ToString.builder("SelectObjectContentEventStream") + .add("response", response) + .add("publisher", publisher) + .build(); + } + + private static final InputStream EMPTY_STREAM = + new ByteArrayInputStream(new byte[0]); +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java index e13aa23db3c..ada46d2d305 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java @@ -23,11 +23,8 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AbortedException; -import com.amazonaws.services.s3.model.SelectObjectContentEvent; -import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.SelectRecordsInputStream; +import software.amazon.awssdk.core.exception.AbortedException; +import software.amazon.awssdk.http.AbortableInputStream; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,7 +90,7 @@ public class SelectInputStream extends FSInputStream implements * Abortable response stream. * This is guaranteed to never be null. */ - private final SelectRecordsInputStream wrappedStream; + private final AbortableInputStream wrappedStream; private final String bucket; @@ -112,14 +109,14 @@ public class SelectInputStream extends FSInputStream implements * The read attempt is initiated immediately. * @param readContext read context * @param objectAttributes object attributes from a HEAD request - * @param selectResponse response from the already executed call + * @param selectPublisher event stream publisher from the already executed call * @throws IOException failure */ @Retries.OnceTranslated public SelectInputStream( final S3AReadOpContext readContext, final S3ObjectAttributes objectAttributes, - final SelectObjectContentResult selectResponse) throws IOException { + final SelectEventStreamPublisher selectPublisher) throws IOException { Preconditions.checkArgument(isNotEmpty(objectAttributes.getBucket()), "No Bucket"); Preconditions.checkArgument(isNotEmpty(objectAttributes.getKey()), @@ -132,17 +129,17 @@ public class SelectInputStream extends FSInputStream implements this.readahead = readContext.getReadahead(); this.streamStatistics = readContext.getS3AStatisticsContext() .newInputStreamStatistics(); - SelectRecordsInputStream stream = once( + + AbortableInputStream stream = once( "S3 Select", uri, - () -> selectResponse.getPayload() - .getRecordsInputStream(new SelectObjectContentEventVisitor() { - @Override - public void visit(final SelectObjectContentEvent.EndEvent event) { - LOG.debug("Completed successful S3 select read from {}", uri); - completedSuccessfully.set(true); - } - })); + () -> { + return selectPublisher.toRecordsInputStream(e -> { + LOG.debug("Completed successful S3 select read from {}", uri); + completedSuccessfully.set(true); + }); + }); + this.wrappedStream = checkNotNull(stream); // this stream is already opened, so mark as such in the statistics. streamStatistics.streamOpened(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java new file mode 100644 index 00000000000..8233e67eea0 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; + +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AUtils; + +import static org.apache.hadoop.fs.s3a.WriteOperationHelper.WriteOperationHelperCallbacks; + +/** + * Helper for SelectObjectContent queries against an S3 Bucket. + */ +public final class SelectObjectContentHelper { + + private SelectObjectContentHelper() { + } + + /** + * Execute an S3 Select operation. + * @param writeOperationHelperCallbacks helper callbacks + * @param source source for selection + * @param request Select request to issue. + * @param action the action for use in exception creation + * @return the select response event stream publisher + * @throws IOException on failure + */ + public static SelectEventStreamPublisher select( + WriteOperationHelperCallbacks writeOperationHelperCallbacks, + Path source, + SelectObjectContentRequest request, + String action) + throws IOException { + try { + Handler handler = new Handler(); + CompletableFuture selectOperationFuture = + writeOperationHelperCallbacks.selectObjectContent(request, handler); + return handler.eventPublisher(selectOperationFuture).join(); + } catch (Throwable e) { + if (e instanceof CompletionException) { + e = e.getCause(); + } + IOException translated; + if (e instanceof SdkException) { + translated = S3AUtils.translateException(action, source, + (SdkException)e); + } else { + translated = new IOException(e); + } + throw translated; + } + } + + private static class Handler implements SelectObjectContentResponseHandler { + private volatile CompletableFuture>> responseAndPublisherFuture = + new CompletableFuture<>(); + + private volatile SelectObjectContentResponse response; + + public CompletableFuture eventPublisher( + CompletableFuture selectOperationFuture) { + return responseAndPublisherFuture.thenApply(p -> + new SelectEventStreamPublisher(selectOperationFuture, + p.getLeft(), p.getRight())); + } + + @Override + public void responseReceived(SelectObjectContentResponse selectObjectContentResponse) { + this.response = selectObjectContentResponse; + } + + @Override + public void onEventStream(SdkPublisher publisher) { + responseAndPublisherFuture.complete(Pair.of(response, publisher)); + } + + @Override + public void exceptionOccurred(Throwable error) { + responseAndPublisherFuture.completeExceptionally(error); + } + + @Override + public void complete() { + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java index c002a4a6dee..711b5823002 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java @@ -21,23 +21,18 @@ package org.apache.hadoop.fs.s3a.statistics.impl; import java.time.Duration; import java.util.function.Consumer; import java.util.function.LongConsumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.metrics.RequestMetricCollector; -import com.amazonaws.util.TimingInfo; +import software.amazon.awssdk.core.metrics.CoreMetric; +import software.amazon.awssdk.http.HttpMetric; +import software.amazon.awssdk.http.HttpStatusCode; +import software.amazon.awssdk.metrics.MetricCollection; +import software.amazon.awssdk.metrics.MetricPublisher; +import software.amazon.awssdk.metrics.SdkMetric; import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk; -import static com.amazonaws.util.AWSRequestMetrics.Field.ClientExecuteTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientRetryCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpRequestTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestMarshallTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestSigningTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.ResponseProcessingTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.ThrottleException; - /** * Collect statistics from the AWS SDK and forward to an instance of * {@link StatisticsFromAwsSdk} and thence into the S3A statistics. @@ -45,9 +40,9 @@ import static com.amazonaws.util.AWSRequestMetrics.Field.ThrottleException; * See {@code com.facebook.presto.hive.s3.PrestoS3FileSystemMetricCollector} * for the inspiration for this. *

    - * See {@code com.amazonaws.util.AWSRequestMetrics} for metric names. + * See {@code software.amazon.awssdk.core.metrics.CoreMetric} for metric names. */ -public class AwsStatisticsCollector extends RequestMetricCollector { +public class AwsStatisticsCollector implements MetricPublisher { /** * final destination of updates. @@ -65,65 +60,122 @@ public class AwsStatisticsCollector extends RequestMetricCollector { /** * This is the callback from the AWS SDK where metrics * can be collected. - * @param request AWS request - * @param response AWS response + * @param metricCollection metrics collection */ @Override - public void collectMetrics( - final Request request, - final Response response) { + public void publish(MetricCollection metricCollection) { + // MetricCollections are nested, so we need to traverse through their + // "children" to collect the desired metrics. E.g.: + // + // ApiCall + // ┌─────────────────────────────────────────┐ + // │ MarshallingDuration=PT0.002808333S │ + // │ RetryCount=0 │ + // │ ApiCallSuccessful=true │ + // │ OperationName=DeleteObject │ + // │ ApiCallDuration=PT0.079801458S │ + // │ CredentialsFetchDuration=PT0.000007083S │ + // │ ServiceId=S3 │ + // └─────────────────────────────────────────┘ + // ApiCallAttempt + // ┌─────────────────────────────────────────────────────────────────┐ + // │ SigningDuration=PT0.000319375S │ + // │ ServiceCallDuration=PT0.078908584S │ + // │ AwsExtendedRequestId=Kmvb2Sz8NuDgIFJPKzLLBhuHgQGmpAjVYBMrSHDvy= │ + // │ HttpStatusCode=204 │ + // │ BackoffDelayDuration=PT0S │ + // │ AwsRequestId=KR0XZCSX │ + // └─────────────────────────────────────────────────────────────────┘ + // HttpClient + // ┌─────────────────────────────────┐ + // │ AvailableConcurrency=1 │ + // │ LeasedConcurrency=0 │ + // │ ConcurrencyAcquireDuration=PT0S │ + // │ PendingConcurrencyAcquires=0 │ + // │ MaxConcurrency=96 │ + // │ HttpClientName=Apache │ + // └─────────────────────────────────┘ - TimingInfo timingInfo = request.getAWSRequestMetrics().getTimingInfo(); + final long[] throttling = {0}; + recurseThroughChildren(metricCollection) + .collect(Collectors.toList()) + .forEach(m -> { + counter(m, CoreMetric.RETRY_COUNT, retries -> { + collector.updateAwsRetryCount(retries); + collector.updateAwsRequestCount(retries + 1); + }); - counter(timingInfo, HttpClientRetryCount.name(), - collector::updateAwsRetryCount); - counter(timingInfo, RequestCount.name(), - collector::updateAwsRequestCount); - counter(timingInfo, ThrottleException.name(), - collector::updateAwsThrottleExceptionsCount); + counter(m, HttpMetric.HTTP_STATUS_CODE, statusCode -> { + if (statusCode == HttpStatusCode.THROTTLING) { + throttling[0] += 1; + } + }); + + timing(m, CoreMetric.API_CALL_DURATION, + collector::noteAwsClientExecuteTime); + + timing(m, CoreMetric.SERVICE_CALL_DURATION, + collector::noteAwsRequestTime); + + timing(m, CoreMetric.MARSHALLING_DURATION, + collector::noteRequestMarshallTime); + + timing(m, CoreMetric.SIGNING_DURATION, + collector::noteRequestSigningTime); + + timing(m, CoreMetric.UNMARSHALLING_DURATION, + collector::noteResponseProcessingTime); + }); + + collector.updateAwsThrottleExceptionsCount(throttling[0]); + } + + @Override + public void close() { - timing(timingInfo, ClientExecuteTime.name(), - collector::noteAwsClientExecuteTime); - timing(timingInfo, HttpRequestTime.name(), - collector::noteAwsRequestTime); - timing(timingInfo, RequestMarshallTime.name(), - collector::noteRequestMarshallTime); - timing(timingInfo, RequestSigningTime.name(), - collector::noteRequestSigningTime); - timing(timingInfo, ResponseProcessingTime.name(), - collector::noteResponseProcessingTime); } /** * Process a timing. - * @param timingInfo timing info - * @param subMeasurementName sub measurement + * @param collection metric collection + * @param metric metric * @param durationConsumer consumer */ private void timing( - TimingInfo timingInfo, - String subMeasurementName, + MetricCollection collection, + SdkMetric metric, Consumer durationConsumer) { - TimingInfo t1 = timingInfo.getSubMeasurement(subMeasurementName); - if (t1 != null && t1.getTimeTakenMillisIfKnown() != null) { - durationConsumer.accept(Duration.ofMillis( - t1.getTimeTakenMillisIfKnown().longValue())); - } + collection + .metricValues(metric) + .forEach(v -> durationConsumer.accept(v)); } /** * Process a counter. - * @param timingInfo timing info - * @param subMeasurementName sub measurement + * @param collection metric collection + * @param metric metric * @param consumer consumer */ private void counter( - TimingInfo timingInfo, - String subMeasurementName, + MetricCollection collection, + SdkMetric metric, LongConsumer consumer) { - Number n = timingInfo.getCounter(subMeasurementName); - if (n != null) { - consumer.accept(n.longValue()); - } + collection + .metricValues(metric) + .forEach(v -> consumer.accept(v.longValue())); + } + + /** + * Metric collections can be nested. Exposes a stream of the given + * collection and its nested children. + * @param metrics initial collection + * @return a stream of all nested metric collections + */ + private static Stream recurseThroughChildren( + MetricCollection metrics) { + return Stream.concat( + Stream.of(metrics), + metrics.children().stream() + .flatMap(c -> recurseThroughChildren(c))); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index b09a3d3fdef..2ce08bb2552 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -32,9 +32,8 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -55,12 +54,14 @@ import org.apache.hadoop.fs.s3a.UnknownStoreException; import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; import org.apache.hadoop.fs.shell.CommandFormat; import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.ExitUtil; + import static org.apache.hadoop.fs.s3a.Constants.AUTHORITATIVE_PATH; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE_DEFAULT; @@ -784,7 +785,7 @@ public final class MarkerTool extends S3GuardTool { private MarkerPurgeSummary purgeMarkers( final DirMarkerTracker tracker, final int deletePageSize) - throws MultiObjectDeleteException, AmazonClientException, IOException { + throws MultiObjectDeleteException, AwsServiceException, IOException { MarkerPurgeSummary summary = new MarkerPurgeSummary(); // we get a map of surplus markers to delete. @@ -792,13 +793,13 @@ public final class MarkerTool extends S3GuardTool { = tracker.getSurplusMarkers(); int size = markers.size(); // build a list from the strings in the map - List collect = + List collect = markers.values().stream() - .map(p -> new DeleteObjectsRequest.KeyVersion(p.getKey())) + .map(p -> ObjectIdentifier.builder().key(p.getKey()).build()) .collect(Collectors.toList()); // build an array list for ease of creating the lists of // keys in each page through the subList() method. - List markerKeys = + List markerKeys = new ArrayList<>(collect); // now randomize. Why so? if the list spans multiple S3 partitions, @@ -819,7 +820,7 @@ public final class MarkerTool extends S3GuardTool { while (start < size) { // end is one past the end of the page int end = Math.min(start + deletePageSize, size); - List page = markerKeys.subList(start, + List page = markerKeys.subList(start, end); once("Remove S3 Keys", tracker.getBasePath().toString(), () -> diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java index a701f86f7b0..7aaec40a868 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java @@ -21,15 +21,16 @@ package org.apache.hadoop.fs.s3a.tools; import java.io.IOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; + /** * Operations which must be offered by the store for {@link MarkerTool}. @@ -62,14 +63,14 @@ public interface MarkerToolOperations { * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. - * @throws AmazonClientException amazon-layer failure. + * @throws AwsServiceException amazon-layer failure. * @throws IOException other IO Exception. */ @Retries.RetryMixed void removeKeys( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java index ccf80e1dde0..d7c77feed10 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java @@ -21,15 +21,16 @@ package org.apache.hadoop.fs.s3a.tools; import java.io.IOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; + /** * Implement the marker tool operations by forwarding to the * {@link OperationCallbacks} instance provided in the constructor. @@ -55,9 +56,9 @@ public class MarkerToolOperationsImpl implements MarkerToolOperations { @Override public void removeKeys( - final List keysToDelete, + final List keysToDelete, final boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, IOException { + throws MultiObjectDeleteException, AwsServiceException, IOException { operationCallbacks.removeKeys(keysToDelete, deleteFakeDir ); } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md new file mode 100644 index 00000000000..162f15951f5 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md @@ -0,0 +1,340 @@ + + +# Upgrade S3A to AWS SDK V2: Changelog + +Note: This document is not meant to be committed as part of the final merge, and instead just serves +as a guide to help with reviewing the PR. + +This document tracks changes to S3A during the upgrade to AWS SDK V2. Once the upgrade +is complete, some of its content will be added to the existing document +[Upcoming upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html). + +This work is tracked in [HADOOP-18073](https://issues.apache.org/jira/browse/HADOOP-18073). + +## Contents + +* [Client Configuration](#client-configuration) +* [Endpoint and region configuration](#endpoint-and-region-configuration) +* [List Object](#list-object) +* [EncryptionSecretOperations](#encryptionsecretoperations) +* [GetObjectMetadata](#getobjectmetadata) +* [PutObject](#putobject) +* [CopyObject](#copyobject) +* [MultipartUpload](#multipartupload) +* [GetObject](#getObject) +* [DeleteObject](#deleteobject) +* [Select](#select) +* [CredentialsProvider](#credentialsprovider) +* [Auditing](#auditing) +* [Metric Collection](#metric-collection) +* [Exception Handling](#exception-handling) +* [Failure Injection](#failure-injection) + +### Client Configuration: + +* We now have two clients, a sync S3 Client and an async S3 Client. The async s3 client is required + as the select operation is currently only supported on the async client. Once we are confident in + the current set of changes, we will also be exploring moving other operations to the async client + as this could provide potential performance benefits. However those changes are not in the scope + of this PR, and will be done separately. +* The [createAwsConf](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java#L1190) +method is now split into: + ``` + createClientConfigBuilder // sets request timeout, user agent* + createHttpClientBuilder* // sets max connections, connection timeout, socket timeout + createProxyConfigurationBuilder // sets proxy config, defined in table below + ``` + +The table below lists the configurations S3A was using and what they now map to. + +|SDK V1 |SDK V2 | +|---|---| +|setMaxConnections |httpClientBuilder.maxConnections | +|setProtocol |The protocol is now HTTPS by default, and can only be modified by setting an HTTP endpoint on the client builder. This is done when setting the endpoint in getS3Endpoint() | +|setMaxErrorRetry |createRetryPolicyBuilder | +|setConnectionTimeout |httpClientBuilder.connectionTimeout | +|setSocketTimeout |httpClientBuilder.socketTimeout | +|setRequestTimeout |overrideConfigBuilder.apiCallAttemptTimeout | +|setSocketBufferSizeHints |Not supported | +|setSignerOverride |Not done yet | +|setProxyHost |proxyConfigBuilder.endpoint | +|setProxyPort |set when setting proxy host with .endpoint | +|setProxyUsername |proxyConfigBuilder.username | +|setProxyPassword |proxyConfigBuilder.password | +|setProxyDomain |proxyConfigBuilder.ntlmDomain, not supported in async client | +|setProxyWorkstation |proxyConfigBuilder.ntlmWorkstation, not supported in async client | +|setUserAgentPrefix |overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); | +|addHeader |overrideConfigBuilder.putHeader | +|setUseThrottleRetries |not supported | + +### Endpoint and region configuration + +Previously, if no endpoint and region was configured, fall back to using us-east-1. Set +withForceGlobalBucketAccessEnabled(true) which will allow access to buckets not in this region too. +Since the SDK V2 no longer supports cross region access, we need to set the region and endpoint of +the bucket. The behaviour has now been changed to: + +* If no endpoint is specified, use s3.amazonaws.com. +* When setting the endpoint, also set the protocol (HTTP or HTTPS) +* When setting the region, first initiate a default S3 Client with region eu-west-2. Call headBucket + using this client. If the bucket is also in eu-west-2, then this will return a successful + response. Otherwise it will throw an error with status code 301 permanently moved. This error + contains the region of the bucket in its header, which we can then use to configure the client. + +### List Object: + +There is no way to paginate the listObject V1 result, we are +doing [this](https://github.com/ahmarsuhail/hadoop/pull/23/files#diff-4050f95b7e3912145415b6e2f9cd3b0760fcf2ce96bf0980c6c30a6edad2d0fbR2745) +instead. We are trying to get pagination to listObject V1 in the SDK, but will have to use this +workaround for now. + +### EncryptionSecretOperations: + +Two new methods have been added, `getSSECustomerKey` and `getSSEAwsKMSKey`. Previously SDK V1 had +specific classes for these keys `SSECustomerKey` and `SSEAwsKeyManagementParams` . There are no such +classes with V2, and things need to be set manually. For this reason, we simply just return keys as +strings now. And will have to calculate and set md5’s ourselves when building the request. + + +### GetObjectMetadata: + +* `RequestFactory.newGetObjectMetadataRequest` is now `RequestFactory.newHeadObjectRequestBuilder`. +* In `HeaderProcessing.retrieveHeaders()`, called by `getXAttrs()`, + removed `maybeSetHeader(headers, XA_CONTENT_MD5, md.getContentMD5())` as S3 doesn’t ever actually + return an md5 header, regardless of whether you set it during a putObject. It does return + an `etag` which may or may not be an md5 depending on certain conditions. `getContentMD5()` is + always empty, there does not seem to be a need to set this header. +* `RequestFactoryImpl.setOptionalGetObjectMetadataParameters` : Method has been removed and this + logic has been moved to `RequestFactoryImpl.newHeadObjectRequestBuilder()` +* `RequestFactoryImpl.generateSSECustomerKey()` has been removed, and instead + call `EncryptionSecretOperations.createSSECustomerKey` directly in `newHeadObjectRequestBuilder()` + + + +### PutObject + +* Previously, when creating the `putObjectRequest`, you would also give it the data to be uploaded. + So it would be of the form `PutObjectRequest(bucket, key, file/inputstream)`, this is no longer + the case. Instead, the data now needs to be passed in while making the `s3Client.putObject()` + call. For this reason, the data is now part of + the `S3AFileSystem.putObject(putObjectRequest, file, listener)` + and `S3AFileSystem.putObjectDirect(putObjectRequest, putOptions, uploadData, isFile)`. +* `S3ADataBlocks`: Need to make this class public as it’s now used to pass in data + to `putObjectDirect()`, sometimes from outside the package (`MagicCommitTracker` + , `ITestS3AMiscOperations`). +* `ProgressableProgressListener`: You can no longer pass in the `Upload` while initialising the + listener + as `ProgressableProgressListener listener = new ProgressableProgressListener(this, key, upload, progress);` + The upload is now only available after initialising the listener, since the listener needs to be + initialised during creation of the Transfer Manager upload. Previously, you could create the + listener after the starting the TM upload, and attach it. +* The `Upload` is now passed into the progress listener later, + in `listener.uploadCompleted(uploadInfo.getFileUpload());`. +* `UploadInfo`: Previously, since the data to be uploaded was part of `putObjectRequest`, the + transfer manager only returned a single `Upload` type, which could be used to track the upload. + Now, depending on the upload type (eg: File or InputStream), it returns different types. This + class has been updated to return FileUpload info, as it’s only ever used for file uploads + currently. It can be extended to store different transfer types in the future. +* `WriteOperationHelper.createPutObjectRequest() `: Previously the data to be uploaded was part + of `PutObjectRequest`, and so we required two methods to create the request. One for input streams + and one for files. Since the data to be uploaded is no longer part of the request, but instead an + argument in `putObject` , we only need one method now. +* `WriteOperationHelper.newObjectMetadata()`: This method has been removed, as standard metadata, + instead of being part of the `ObjectMetadata`, is now just added while building the request, for + example `putObjectRequestBuilder.serverSideEncryption().` +* `RequestFactory`: Similar to WriteOperationHelper, there is now a single putObjectRequest, + and `newObjectMetadata` has been removed. Instead, all standard metadata is now set in the new + method `buildPutObjectRequest`. +* `RequestFactoryImpl.newObjectMetadata()`: Previously, object metadata was created + using `newObjectMetadata()` and passed into the `newPutObjectRequest()` call. This method has been + removed, as standard metadata, instead of being part of the `ObjectMetadata`, is now just added + while building the request, in `putObjectRequestBuilder.serverSideEncryption().` Content length + and content encoding set in this method is now set in `buildPutObjectRequest()` , and SSE is set + in `putEncryptionParameters()`. +* `RequestFactoryImpl.maybeSetMetadata()` : was a generic method to set user metadata on object + metadata. user metadata now gets set on the request builder, so method has been removed. +* `RequestFactoryImpl.setOptionalPutRequestParameters()` : Method has been removed, and this logic + has been moved to `putEncryptionParameters()` . + +### CopyObject + +* `RequestFactoryImpl.buildPutObjectRequest` : Destination metadata is no longer built + using `newObjectMetadata()` and instead set on the request builder. The logic has a couple of + differences: + * content encoding is set in `buildCopyObjectRequest`, + the `if (contentEncoding != null && !isDirectoryMarker)` can just + be `if (contentEncoding != null)` for copy, as for this `isDirectoryMarker` was always false. + * contentLength is not set, as this is a system defined header, and copied over automatically by + S3 during copy. +* `HeaderProcessing.cloneObjectMetadata`: This was previously also setting a lot of system defined + metadata, eg: `setHttpExpiresDate` and `setLastModified`. These have been removed as they are set + by S3 during the copy. Have tested, and can see they are set automatically regardless of the + metadataDirective (copy or replace). +* `RequestFactoryImpl. copyEncryptionParameters()` : Due to the changes + in `EncryptionSecretOperations`, source and destination encryption params have to be set manually. + +### MultipartUpload + +* `RequestFactoryImpl.newObjectMetdata()` : Metadata is now set on the request builder. For MPU, only +content encoding needs to be set, as per per previous behaviour. Encryption params are set +in ` multipartUploadEncryptionParameters`. + +### GetObject + +* Previously, GetObject returned a `S3Object` response which exposed its content in a + `S3ObjectInputStream` through the `getObjectContent()` method. In SDK v2, the response is + directly a `ResponseInputStream` with the content, while the + `GetObjectResponse` instance can be retrieved by calling `response()` on it. +* The above change simplifies managing the lifetime of the response input stream. In v1, + `S3AInputStream` had to keep a reference to the `S3Object` while holding the wrapped + `S3ObjectInputStream`. When upgraded to SDK v2, it can simply wrap the new + `ResponseInputStream`, which handles lifetime correctly. Same applies + to `SDKStreamDrainer`. Furthermore, the map in `S3ARemoteObject` associating input streams and + `S3Object` instances is no longer needed. +* The range header on a `GetObject` request is now specified as a string, rather than a + `start`-`end` pair. `S3AUtils.formatRange` was introduced to format it. + +### DeleteObject + +In SDK v1, bulk delete would throw a `com.amazonaws.services.s3.model.MultiObjectDeleteException` +in case of partial failure. In v2, instead, it returns a `DeleteObjectsResponse` containing a +list of errors. A new `MultiObjectDeleteException` class was introduced in +`org.apache.hadoop.fs.s3a` and is thrown when appropriate to reproduce the previous behaviour. +* `MultiObjectDeleteSupport.translateDeleteException` was moved into `MultiObjectDeleteException`. +* `ObjectIdentifier` replaces DeleteObjectsRequest.KeyVersion. + +### Select + +In SDK v2, Handling of select requests has changes significantly since SelectObjectContent is +only supported on the new async S3 client. In previous versions, the response to a +SelectObjectContent request exposed the results in a `SelectRecordsInputStream`, which S3A +could wrap in `SelectInputStream`. In v2, instead, the response needs to be handled by an object +implementing `SelectObjectContentResponseHandler`, which can receive an async publisher of +the "events" returned by the service (`SdkPublisher`). + +In order to adapt the new API in S3A, three new classes have been introduced in +`org.apache.hadoop.fs.s3a.select`: + +* `SelectObjectContentHelper`: wraps the `selectObjectContent()` call, provides a custom + response handler to receive the response, and exposes a `SelectEventStreamPublisher`. +* `SelectEventStreamPublisher`: a publisher of select event stream events, which handles the + future returned by the select call and wraps the original publisher. This class provides + a `toRecordsInputStream()` method which returns an input stream containing the results, + reproducing the behaviour of the old `SelectRecordsInputStream`. +* `BlockingEnumeration`: an adapter which lazily requests new elements from the publisher and + exposes them through an `Enumeration` interface. Used in + `SelectEventStreamPublisher.toRecordsInputStream()` to adapt the event publisher into + an enumeration of input streams, eventually passed to a `SequenceInputStream`. + Note that the "lazy" behaviour means that new elements are requested only on `read()` calls on + the input stream. + + + +### CredentialsProvider + +* All credential provider classes implemented in Hadoop now implement V2's `AwsCredentialProvider` +* New adapter class `org.apache.hadoop.fs.s3a.adapter.V1ToV2AwsCredentialProviderAdapter` has been + added. This converts SDK V1 credential providers to SDK V2’s which + implement `AwsCredentialsProvider`. +* `AWSCredentialProviderList` also implements `AwsCredentialProvider`. But keeps existing + constructors and add methods for V1 credential providers, and wraps V1 cred providers in the + adapter here. This means that custom binding classes in delegation tokens, as well as any custom + credential providers will continue to work. +* Added a new `getCredentials()` method in `AWSCredentialProviderList`, which ensured that custom + binding classes which are calling `AWSCredentialProviderList.getCredentials()`, continue to work. +* The following values `fs.s3a.aws.credentials.provider` are mapped: + as `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`, then map it to V2’s + +|`fs.s3a.aws.credentials.provider` value |Mapped to | +|---|---| +|`com.amazonaws.auth.EnvironmentVariableCredentialsProvider` |`software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | +|`com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper` |`org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | +|`com.amazonaws.auth.`InstanceProfileCredentialsProvider`` |`org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | + + +### Auditing + +The SDK v2 offers a new `ExecutionInterceptor` +[interface](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html) +which broadly replaces the `RequestHandler2` abstract class from v1. +Switching to the new mechanism in S3A brings: + +* Simplification in `AWSAuditEventCallbacks` (and implementors) which can now extend + `ExecutionInterceptor` +* "Registering" a Span with a request has moved from `requestCreated` to `beforeExecution` + (where an `ExecutionAttributes` instance is first available) +* The ReferrerHeader is built and added to the http request in `modifyHttpRequest`, + rather than in `beforeExecution`, where no http request is yet available +* Dynamic loading of interceptors has been implemented to reproduce previous behaviour + with `RequestHandler2`s. The AWS SDK v2 offers an alternative mechanism, described + [here](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html) + under "Interceptor Registration", which could make it redundant. + +In the Transfer Manager, `TransferListener` replaces `TransferStateChangeListener`. S3A code +has been updated and `AuditManagerS3A` implementations now provide an instance of the former to +switch to the active span, but registration of the new listeners is currently commented out because +it causes an incompatibility issue with the internal logger, resulting in `NoSuchMethodError`s, +at least in the current TransferManager Preview release. + + +### Metric Collection + +`AwsStatisticsCollector` has been updated to implement the new `MetricPublisher` interface +and collect the metrics from a `MetricCollection` object. +The following table maps SDK v2 metrics to their equivalent in v1: + +| v2 Metrics| com.amazonaws.util.AWSRequestMetrics.Field| Comment| +|-------------------------------------------------------------|---------------------------------------------|--------------------------------| +| CoreMetric.RETRY_COUNT| HttpClientRetryCount|| +| CoreMetric.RETRY_COUNT| RequestCount| always HttpClientRetryCount+1| +| HttpMetric.HTTP_STATUS_CODE with HttpStatusCode.THROTTLING| ThrottleException| to be confirmed| +| CoreMetric.API_CALL_DURATION| ClientExecuteTime|| +| CoreMetric.SERVICE_CALL_DURATION| HttpRequestTime|| +| CoreMetric.MARSHALLING_DURATION| RequestMarshallTime|| +| CoreMetric.SIGNING_DURATION| RequestSigningTime|| +| CoreMetric.UNMARSHALLING_DURATION| ResponseProcessingTime| to be confirmed| + +Note that none of the timing metrics (`*_DURATION`) are currently collected in S3A. + +### Exception Handling + +The code to handle exceptions thrown by the SDK has been updated to reflect the changes in v2: + +* `com.amazonaws.SdkBaseException` and `com.amazonaws.AmazonClientException` changes: + * These classes have combined and replaced with + `software.amazon.awssdk.core.exception.SdkException`. +* `com.amazonaws.SdkClientException` changes: + * This class has been replaced with `software.amazon.awssdk.core.exception.SdkClientException`. + * This class now extends `software.amazon.awssdk.core.exception.SdkException`. +* `com.amazonaws.AmazonServiceException` changes: + * This class has been replaced with + `software.amazon.awssdk.awscore.exception.AwsServiceException`. + * This class now extends `software.amazon.awssdk.core.exception.SdkServiceException`, + a new exception type that extends `software.amazon.awssdk.core.exception.SdkException`. + +See also the +[SDK changelog](https://github.com/aws/aws-sdk-java-v2/blob/master/docs/LaunchChangelog.md#3-exception-changes). + + +### Failure Injection + +While using the SDK v1, failure injection was implemented in `InconsistentAmazonS3CClient`, +which extended the S3 client. In SDK v2, reproducing this approach would not be straightforward, +since the default S3 client is an internal final class. Instead, the same fault injection strategy +is now performed by a `FailureInjectionInterceptor` (see +[ExecutionInterceptor](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html)) +registered on the default client by `InconsistentS3CClientFactory`. +`InconsistentAmazonS3CClient` has been removed. No changes to the user configuration are required. + diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index d2ed9ede017..bc5bc64f863 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -1004,20 +1004,18 @@ using an absolute XInclude reference to it. **Warning do not enable any type of failure injection in production. The following settings are for testing only.** -One of the challenges with S3A integration tests was the fact that S3 was an -eventually-consistent storage system. To simulate inconsistencies more -frequently than they would normally surface, S3A supports a shim layer on top of the `AmazonS3Client` -class which artificially delays certain paths from appearing in listings. -This is implemented in the class `InconsistentAmazonS3Client`. +S3A provides an "Inconsistent S3 Client Factory" that can be used to +simulate throttling by injecting random failures on S3 client requests. -Now that S3 is consistent, injecting inconsistency is no longer needed -during testing. -However, it is stil useful to use the other feature of the client: -throttling simulation. -## Simulating List Inconsistencies +**Note** -### Enabling the InconsistentAmazonS3CClient +In previous releases, this factory could also be used to simulate +inconsistencies during testing of S3Guard. Now that S3 is consistent, +injecting inconsistency is no longer needed during testing. + + +### Enabling the InconsistentS3CClientFactory To enable the fault-injecting client via configuration, switch the @@ -1047,7 +1045,7 @@ These exceptions are returned to S3; they do not test the AWS SDK retry logic. -### Using the `InconsistentAmazonS3CClient` in downstream integration tests +### Using the `InconsistentS3CClientFactory` in downstream integration tests The inconsistent client is shipped in the `hadoop-aws` JAR, so it can be used in integration tests. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index a46303f3396..d233081ee68 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -20,8 +20,9 @@ package org.apache.hadoop.fs.s3a; import static org.apache.hadoop.fs.s3a.Constants.*; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.services.s3.AmazonS3; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.S3Client; import java.net.URI; @@ -32,6 +33,7 @@ import org.junit.Before; import org.junit.Rule; import org.junit.rules.ExpectedException; + /** * Abstract base class for S3A unit tests using a mock S3 client and a null * metadata store. @@ -39,17 +41,20 @@ import org.junit.rules.ExpectedException; public abstract class AbstractS3AMockTest { protected static final String BUCKET = "mock-bucket"; - protected static final AmazonServiceException NOT_FOUND; - static { - NOT_FOUND = new AmazonServiceException("Not Found"); - NOT_FOUND.setStatusCode(404); - } + protected static final AwsServiceException NOT_FOUND = + AwsServiceException.builder() + .message("Not Found") + .statusCode(404) + .awsErrorDetails(AwsErrorDetails.builder() + .errorCode("") + .build()) + .build(); @Rule public ExpectedException exception = ExpectedException.none(); protected S3AFileSystem fs; - protected AmazonS3 s3; + protected S3Client s3; @Before public void setup() throws Exception { @@ -62,7 +67,6 @@ public abstract class AbstractS3AMockTest { s3 = fs.getAmazonS3ClientForTesting("mocking"); } - @SuppressWarnings("deprecation") public Configuration createConfiguration() { Configuration conf = new Configuration(); conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class, @@ -75,9 +79,15 @@ public abstract class AbstractS3AMockTest { // assertions to be safely made without worrying // about any race conditions conf.setInt(ASYNC_DRAIN_THRESHOLD, Integer.MAX_VALUE); + // set the region to avoid the getBucketLocation on FS init. + conf.set(AWS_REGION, "eu-west-1"); return conf; } + public S3Client getS3Client() { + return s3; + } + @After public void teardown() throws Exception { if (fs != null) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java index 4013e9db29a..794480d4409 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java @@ -20,7 +20,7 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.net.util.Base64; @@ -69,33 +69,33 @@ public final class EncryptionTestUtils { final S3AEncryptionMethods algorithm, final String kmsKeyArn) throws IOException { - ObjectMetadata md = fs.getObjectMetadata(path); + HeadObjectResponse md = fs.getObjectMetadata(path); String details = String.format( "file %s with encryption algorithm %s and key %s", path, - md.getSSEAlgorithm(), - md.getSSEAwsKmsKeyId()); + md.serverSideEncryptionAsString(), + md.ssekmsKeyId()); switch(algorithm) { case SSE_C: assertNull("Metadata algorithm should have been null in " + details, - md.getSSEAlgorithm()); + md.serverSideEncryptionAsString()); assertEquals("Wrong SSE-C algorithm in " + details, - SSE_C_ALGORITHM, md.getSSECustomerAlgorithm()); + SSE_C_ALGORITHM, md.sseCustomerAlgorithm()); String md5Key = convertKeyToMd5(fs); assertEquals("getSSECustomerKeyMd5() wrong in " + details, - md5Key, md.getSSECustomerKeyMd5()); + md5Key, md.sseCustomerKeyMD5()); break; case SSE_KMS: assertEquals("Wrong algorithm in " + details, - AWS_KMS_SSE_ALGORITHM, md.getSSEAlgorithm()); + AWS_KMS_SSE_ALGORITHM, md.serverSideEncryptionAsString()); assertEquals("Wrong KMS key in " + details, kmsKeyArn, - md.getSSEAwsKmsKeyId()); + md.ssekmsKeyId()); break; default: - assertEquals("AES256", md.getSSEAlgorithm()); + assertEquals("AES256", md.serverSideEncryptionAsString()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index c13c3f48b84..6d1b10954e7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -32,9 +32,9 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,20 +77,17 @@ public class ITestS3AAWSCredentialsProvider { * or a public default constructor. */ static class BadCredentialsProviderConstructor - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { @SuppressWarnings("unused") public BadCredentialsProviderConstructor(String fsUri, Configuration conf) { } @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("dummy_key", "dummy_secret"); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("dummy_key", "dummy_secret"); } - @Override - public void refresh() { - } } @Test @@ -125,20 +122,17 @@ public class ITestS3AAWSCredentialsProvider { fail("Expected exception - got " + fs); } - static class BadCredentialsProvider implements AWSCredentialsProvider { + static class BadCredentialsProvider implements AwsCredentialsProvider { @SuppressWarnings("unused") public BadCredentialsProvider(Configuration conf) { } @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("bad_key", "bad_secret"); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("bad_key", "bad_secret"); } - @Override - public void refresh() { - } } @Test @@ -157,7 +151,6 @@ public class ITestS3AAWSCredentialsProvider { } @Test - @SuppressWarnings("deprecation") public void testAnonymousProvider() throws Exception { Configuration conf = new Configuration(); conf.set(AWS_CREDENTIALS_PROVIDER, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index 9485202f64c..2507ae2f510 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -125,11 +125,12 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { Configuration conf = new Configuration(getFileSystem().getConf()); S3ATestUtils.disableFilesystemCaching(conf); conf.setInt(S3A_BUCKET_PROBE, probe); + conf.set(AWS_REGION, "eu-west-1"); return conf; } @Test - public void testBucketProbingV1() throws Exception { + public void testBucketProbing() throws Exception { describe("Test the V1 bucket probe"); Configuration configuration = createConfigurationWithProbe(1); expectUnknownStore( @@ -137,18 +138,24 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { } @Test - public void testBucketProbingV2() throws Exception { - describe("Test the V2 bucket probe"); + public void testBucketProbing2() throws Exception { + describe("Test the bucket probe with probe value set to 2"); Configuration configuration = createConfigurationWithProbe(2); + expectUnknownStore( () -> FileSystem.get(uri, configuration)); - /* - * Bucket probing should also be done when value of - * S3A_BUCKET_PROBE is greater than 2. - */ - configuration.setInt(S3A_BUCKET_PROBE, 3); - expectUnknownStore( - () -> FileSystem.get(uri, configuration)); + } + + @Test + public void testBucketProbing3() throws Exception { + describe("Test the bucket probe with probe value set to 3"); + Configuration configuration = createConfigurationWithProbe(3); + fs = FileSystem.get(uri, configuration); + Path root = new Path(uri); + + assertTrue("root path should always exist", fs.exists(root)); + assertTrue("getFileStatus on root should always return a directory", + fs.getFileStatus(root).isDirectory()); } @Test @@ -162,8 +169,8 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { } @Test - public void testAccessPointProbingV2() throws Exception { - describe("Test V2 bucket probing using an AccessPoint ARN"); + public void testAccessPointProbing2() throws Exception { + describe("Test bucket probing using probe value 2, and an AccessPoint ARN"); Configuration configuration = createArnConfiguration(); String accessPointArn = "arn:aws:s3:eu-west-1:123456789012:accesspoint/" + randomBucket; configuration.set(String.format(InternalConstants.ARN_BUCKET_OPTION, randomBucket), @@ -175,7 +182,7 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { @Test public void testAccessPointRequired() throws Exception { - describe("Test V2 bucket probing with 'fs.s3a.accesspoint.required' property."); + describe("Test bucket probing with 'fs.s3a.accesspoint.required' property."); Configuration configuration = createArnConfiguration(); configuration.set(AWS_S3_ACCESSPOINT_REQUIRED, "true"); intercept(PathIOException.class, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java index 1071582cc67..7ffb16833e0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java @@ -20,11 +20,13 @@ package org.apache.hadoop.fs.s3a; import java.util.List; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.AccessControlList; -import com.amazonaws.services.s3.model.Grant; -import com.amazonaws.services.s3.model.GroupGrantee; -import com.amazonaws.services.s3.model.Permission; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectAclRequest; +import software.amazon.awssdk.services.s3.model.GetObjectAclResponse; +import software.amazon.awssdk.services.s3.model.Grant; +import software.amazon.awssdk.services.s3.model.Grantee; +import software.amazon.awssdk.services.s3.model.Permission; +import software.amazon.awssdk.services.s3.model.Type; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; @@ -55,7 +57,6 @@ public class ITestS3ACannedACLs extends AbstractS3ATestBase { Configuration conf = super.createConfiguration(); removeBaseAndBucketOverrides(conf, CANNED_ACL); - conf.set(CANNED_ACL, LOG_DELIVERY_WRITE); // needed because of direct calls made conf.setBoolean(S3AAuditConstants.REJECT_OUT_OF_SPAN_OPERATIONS, false); @@ -89,18 +90,26 @@ public class ITestS3ACannedACLs extends AbstractS3ATestBase { S3AFileSystem fs = getFileSystem(); StoreContext storeContext = fs.createStoreContext(); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("acls"); + S3Client s3 = fs.getAmazonS3ClientForTesting("acls"); String key = storeContext.pathToKey(path); if (!isFile) { key = key + "/"; } - AccessControlList acl = s3.getObjectAcl(storeContext.getBucket(), - key); - List grants = acl.getGrantsAsList(); + GetObjectAclResponse acl = s3.getObjectAcl(GetObjectAclRequest.builder() + .bucket(storeContext.getBucket()) + .key(key) + .build()); + List grants = acl.grants(); for (Grant grant : grants) { LOG.info("{}", grant.toString()); } - Grant loggingGrant = new Grant(GroupGrantee.LogDelivery, Permission.Write); + Grant loggingGrant = Grant.builder() + .grantee(Grantee.builder() + .type(Type.GROUP) + .uri("http://acs.amazonaws.com/groups/s3/LogDelivery") + .build()) + .permission(Permission.WRITE) + .build(); Assertions.assertThat(grants) .describedAs("ACL grants of object %s", path) .contains(loggingGrant); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java index bcc37c8bfbb..4f1dcdfd523 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java @@ -21,11 +21,11 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; import java.util.Map; -import com.amazonaws.services.s3.Headers; import org.assertj.core.api.Assertions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.impl.AWSHeaders; import org.apache.hadoop.fs.s3a.impl.HeaderProcessing; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; @@ -69,14 +69,14 @@ public class ITestS3AClientSideEncryptionKms // Assert KeyWrap Algo assertEquals("Key wrap algo isn't same as expected", KMS_KEY_WRAP_ALGO, processHeader(fsXAttrs, - xAttrPrefix + Headers.CRYPTO_KEYWRAP_ALGORITHM)); + xAttrPrefix + AWSHeaders.CRYPTO_KEYWRAP_ALGORITHM)); // Assert content encryption algo for KMS, is present in the // materials description and KMS key ID isn't. String keyId = getS3EncryptionKey(getTestBucketName(getConfiguration()), getConfiguration()); Assertions.assertThat(processHeader(fsXAttrs, - xAttrPrefix + Headers.MATERIALS_DESCRIPTION)) + xAttrPrefix + AWSHeaders.MATERIALS_DESCRIPTION)) .describedAs("Materials Description should contain the content " + "encryption algo and should not contain the KMS keyID.") .contains(KMS_CONTENT_ENCRYPTION_ALGO) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 26d00bc7d35..f7bdaa62422 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -18,10 +18,17 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.S3ClientOptions; - +import software.amazon.awssdk.core.client.config.SdkClientConfiguration; +import software.amazon.awssdk.core.client.config.SdkClientOption; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.signer.Signer; +import software.amazon.awssdk.http.SdkHttpFullRequest; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.model.StsException; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.reflect.FieldUtils; @@ -29,9 +36,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.auth.STSClientFactory; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.test.GenericTestUtils; -import org.junit.Assert; + +import org.assertj.core.api.Assertions; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; @@ -109,7 +118,7 @@ public class ITestS3AConfiguration { } else { conf.set(Constants.ENDPOINT, endpoint); fs = S3ATestUtils.createTestFileSystem(conf); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("test endpoint"); + S3Client s3 = fs.getAmazonS3ClientForTesting("test endpoint"); String endPointRegion = ""; // Differentiate handling of "s3-" and "s3." based endpoint identifiers String[] endpointParts = StringUtils.split(endpoint, '.'); @@ -120,8 +129,11 @@ public class ITestS3AConfiguration { } else { fail("Unexpected endpoint"); } + // TODO: review way to get the bucket region. + String region = s3.getBucketLocation(b -> b.bucket(fs.getUri().getHost())) + .locationConstraintAsString(); assertEquals("Endpoint config setting and bucket location differ: ", - endPointRegion, s3.getBucketLocation(fs.getUri().getHost())); + endPointRegion, region); } } @@ -346,22 +358,25 @@ public class ITestS3AConfiguration { try { fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("configuration"); + S3Client s3 = fs.getAmazonS3ClientForTesting("configuration"); assertNotNull(s3); - S3ClientOptions clientOptions = getField(s3, S3ClientOptions.class, - "clientOptions"); + + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, + "clientConfiguration"); + S3Configuration s3Configuration = + (S3Configuration)clientConfiguration.option(SdkClientOption.SERVICE_CONFIGURATION); assertTrue("Expected to find path style access to be switched on!", - clientOptions.isPathStyleAccess()); + s3Configuration.pathStyleAccessEnabled()); byte[] file = ContractTestUtils.toAsciiByteArray("test file"); ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); - } catch (final AWSS3IOException e) { + } catch (final AWSRedirectException e) { LOG.error("Caught exception: ", e); // Catch/pass standard path style access behaviour when live bucket // isn't in the same region as the s3 client default. See // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html - assertEquals(HttpStatus.SC_MOVED_PERMANENTLY, e.getStatusCode()); + assertEquals(HttpStatus.SC_MOVED_PERMANENTLY, e.statusCode()); } catch (final IllegalArgumentException e) { // Path style addressing does not work with AP ARNs if (!fs.getBucket().contains("arn:")) { @@ -378,12 +393,13 @@ public class ITestS3AConfiguration { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("User Agent"); + S3Client s3 = fs.getAmazonS3ClientForTesting("User Agent"); assertNotNull(s3); - ClientConfiguration awsConf = getField(s3, ClientConfiguration.class, + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); - assertEquals("Hadoop " + VersionInfo.getVersion(), - awsConf.getUserAgentPrefix()); + Assertions.assertThat(clientConfiguration.option(SdkClientOption.CLIENT_USER_AGENT)) + .describedAs("User Agent prefix") + .startsWith("Hadoop " + VersionInfo.getVersion()); } @Test @@ -392,12 +408,13 @@ public class ITestS3AConfiguration { conf.set(Constants.USER_AGENT_PREFIX, "MyApp"); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("User agent"); + S3Client s3 = fs.getAmazonS3ClientForTesting("User agent"); assertNotNull(s3); - ClientConfiguration awsConf = getField(s3, ClientConfiguration.class, + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); - assertEquals("MyApp, Hadoop " + VersionInfo.getVersion(), - awsConf.getUserAgentPrefix()); + Assertions.assertThat(clientConfiguration.option(SdkClientOption.CLIENT_USER_AGENT)) + .describedAs("User Agent prefix") + .startsWith("MyApp, Hadoop " + VersionInfo.getVersion()); } @Test @@ -405,16 +422,16 @@ public class ITestS3AConfiguration { conf = new Configuration(); conf.set(REQUEST_TIMEOUT, "120"); fs = S3ATestUtils.createTestFileSystem(conf); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("Request timeout (ms)"); - ClientConfiguration awsConf = getField(s3, ClientConfiguration.class, + S3Client s3 = fs.getAmazonS3ClientForTesting("Request timeout (ms)"); + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); assertEquals("Configured " + REQUEST_TIMEOUT + " is different than what AWS sdk configuration uses internally", - 120000, awsConf.getRequestTimeout()); + 120000, + clientConfiguration.option(SdkClientOption.API_CALL_ATTEMPT_TIMEOUT).toMillis()); } @Test - @SuppressWarnings("deprecation") public void testCloseIdempotent() throws Throwable { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); @@ -513,35 +530,74 @@ public class ITestS3AConfiguration { @Test(timeout = 10_000L) public void testS3SpecificSignerOverride() throws IOException { - ClientConfiguration clientConfiguration = null; - Configuration config; + Configuration config = new Configuration(); - String signerOverride = "testSigner"; - String s3SignerOverride = "testS3Signer"; + config.set(CUSTOM_SIGNERS, + "CustomS3Signer:" + CustomS3Signer.class.getName() + ",CustomSTSSigner:" + + CustomSTSSigner.class.getName()); - // Default SIGNING_ALGORITHM, overridden for S3 only - config = new Configuration(); - config.set(SIGNING_ALGORITHM_S3, s3SignerOverride); - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_S3); - Assert.assertEquals(s3SignerOverride, - clientConfiguration.getSignerOverride()); - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_STS); - Assert.assertNull(clientConfiguration.getSignerOverride()); + config.set(SIGNING_ALGORITHM_S3, "CustomS3Signer"); + config.set(SIGNING_ALGORITHM_STS, "CustomSTSSigner"); - // Configured base SIGNING_ALGORITHM, overridden for S3 only - config = new Configuration(); - config.set(SIGNING_ALGORITHM, signerOverride); - config.set(SIGNING_ALGORITHM_S3, s3SignerOverride); - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_S3); - Assert.assertEquals(s3SignerOverride, - clientConfiguration.getSignerOverride()); - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_STS); - Assert - .assertEquals(signerOverride, clientConfiguration.getSignerOverride()); + config.set(AWS_REGION, "eu-west-1"); + fs = S3ATestUtils.createTestFileSystem(config); + + S3Client s3Client = fs.getAmazonS3ClientForTesting("testS3SpecificSignerOverride"); + + StsClient stsClient = + STSClientFactory.builder(config, fs.getBucket(), new AnonymousAWSCredentialsProvider(), "", + "").build(); + + try { + stsClient.getSessionToken(); + } catch (StsException exception) { + // Expected 403, as credentials are not provided. + } + + try { + s3Client.headBucket(HeadBucketRequest.builder().bucket(fs.getBucket()).build()); + } catch (S3Exception exception) { + // Expected 403, as credentials are not provided. + } + + Assertions.assertThat(CustomS3Signer.isS3SignerCalled()) + .describedAs("Custom S3 signer not called").isTrue(); + + Assertions.assertThat(CustomSTSSigner.isSTSSignerCalled()) + .describedAs("Custom STS signer not called").isTrue(); } + public static final class CustomS3Signer implements Signer { + + private static boolean s3SignerCalled = false; + + @Override + public SdkHttpFullRequest sign(SdkHttpFullRequest request, + ExecutionAttributes executionAttributes) { + LOG.debug("Custom S3 signer called"); + s3SignerCalled = true; + return request; + } + + public static boolean isS3SignerCalled() { + return s3SignerCalled; + } + } + + public static final class CustomSTSSigner implements Signer { + + private static boolean stsSignerCalled = false; + + @Override + public SdkHttpFullRequest sign(SdkHttpFullRequest request, + ExecutionAttributes executionAttributes) { + LOG.debug("Custom STS signer called"); + stsSignerCalled = true; + return request; + } + + public static boolean isSTSSignerCalled() { + return stsSignerCalled; + } + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java index 64e37bf832b..45b0c6c206f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java @@ -63,7 +63,7 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption { private static final String SERVICE_AMAZON_S3_STATUS_CODE_403 - = "Service: Amazon S3; Status Code: 403;"; + = "Service: S3, Status Code: 403"; private static final String KEY_1 = "4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs="; private static final String KEY_2 diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java index 68ab5bd9e8c..3a722066414 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java @@ -20,7 +20,7 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -51,9 +51,9 @@ public class ITestS3AEncryptionSSEKMSDefaultKey @Override protected void assertEncrypted(Path path) throws IOException { - ObjectMetadata md = getFileSystem().getObjectMetadata(path); + HeadObjectResponse md = getFileSystem().getObjectMetadata(path); assertEquals("SSE Algorithm", EncryptionTestUtils.AWS_KMS_SSE_ALGORITHM, - md.getSSEAlgorithm()); - assertThat(md.getSSEAwsKmsKeyId(), containsString("arn:aws:kms:")); + md.serverSideEncryptionAsString()); + assertThat(md.ssekmsKeyId(), containsString("arn:aws:kms:")); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java index a0fb762d899..41dd820b69b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java @@ -118,7 +118,7 @@ public class ITestS3AEncryptionWithDefaultS3Settings extends S3AFileSystem fs = getFileSystem(); Path path = path(getMethodName() + "find-encryption-algo"); ContractTestUtils.touch(fs, path); - String sseAlgorithm = fs.getObjectMetadata(path).getSSEAlgorithm(); + String sseAlgorithm = fs.getObjectMetadata(path).serverSideEncryptionAsString(); if(StringUtils.isBlank(sseAlgorithm) || !sseAlgorithm.equals(AWS_KMS_SSE_ALGORITHM)) { skip("Test bucket is not configured with " + AWS_KMS_SSE_ALGORITHM); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index add6502d7da..c957ab7b6a4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -21,21 +21,27 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.util.AwsHostNameUtils; import org.assertj.core.api.Assertions; +import org.junit.Assert; import org.junit.Test; +import software.amazon.awssdk.awscore.AwsExecutionAttribute; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.S3Exception; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; -import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; -import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AWS_REGION_SYSPROP; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -44,114 +50,104 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; */ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { - private static final String AWS_REGION_TEST = "test-region"; private static final String AWS_ENDPOINT_TEST = "test-endpoint"; - private static final String AWS_ENDPOINT_TEST_WITH_REGION = - "test-endpoint.some-region.amazonaws.com"; - public static final String MARS_NORTH_2 = "mars-north-2"; + /** - * Test to verify that setting a region with the config would bypass the - * construction of region from endpoint. + * Test to verify that not setting the region config, will lead to the client factory making + * a HEAD bucket call to configure the correct region. If an incorrect region is set, the HEAD + * bucket call in this test will raise an exception. */ @Test - public void testWithRegionConfig() { - getFileSystem().getConf().set(AWS_REGION, AWS_REGION_TEST); - - //Creating an endpoint config with a custom endpoint. - AwsClientBuilder.EndpointConfiguration epr = createEpr(AWS_ENDPOINT_TEST, - getFileSystem().getConf().getTrimmed(AWS_REGION)); - //Checking if setting region config bypasses the endpoint region. - Assertions.assertThat(epr.getSigningRegion()) - .describedAs("There is a region mismatch") - .isEqualTo(getFileSystem().getConf().get(AWS_REGION)); - } - - /** - * Test to verify that not setting the region config, would lead to using - * endpoint to construct the region. - */ - @Test - public void testWithoutRegionConfig() { - getFileSystem().getConf().unset(AWS_REGION); - - //Creating an endpoint config with a custom endpoint containing a region. - AwsClientBuilder.EndpointConfiguration eprRandom = - createEpr(AWS_ENDPOINT_TEST_WITH_REGION, - getFileSystem().getConf().getTrimmed(AWS_REGION)); - String regionFromEndpoint = - AwsHostNameUtils - .parseRegionFromAwsPartitionPattern(AWS_ENDPOINT_TEST_WITH_REGION); - //Checking if not setting region config leads to constructing the region - // from endpoint. - Assertions.assertThat(eprRandom.getSigningRegion()) - .describedAs("There is a region mismatch") - .isNotEqualTo(getFileSystem().getConf().get(AWS_REGION)) - .isEqualTo(regionFromEndpoint); - } - - /** - * Method to create EndpointConfiguration using an endpoint. - * - * @param endpoint the endpoint to be used for EndpointConfiguration creation. - * @return an instance of EndpointConfiguration. - */ - private AwsClientBuilder.EndpointConfiguration createEpr(String endpoint, - String awsRegion) { - return DefaultS3ClientFactory.createEndpointConfiguration(endpoint, - new ClientConfiguration(), awsRegion); - } - - - @Test - public void testInvalidRegionDefaultEndpoint() throws Throwable { - describe("Create a client with an invalid region and the default endpoint"); + public void testWithoutRegionConfig() throws IOException { Configuration conf = getConfiguration(); - // we are making a big assumption about the timetable for AWS - // region rollout. - // if this test ever fails because this region now exists - // -congratulations! - conf.set(AWS_REGION, MARS_NORTH_2); - createMarsNorth2Client(conf); + String bucket = getFileSystem().getBucket(); + conf.unset(String.format("fs.s3a.bucket.%s.endpoint.region", bucket)); + conf.unset(AWS_REGION); + + S3AFileSystem fs = new S3AFileSystem(); + fs.initialize(getFileSystem().getUri(), conf); + + try { + fs.getBucketMetadata(); + } catch (S3Exception exception) { + if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { + Assert.fail(exception.toString()); + } + } + + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Region is not configured, region probe should have been made").isEqualTo(1); + } + + + @Test + public void testWithRegionConfig() throws IOException, URISyntaxException { + Configuration conf = getConfiguration(); + conf.set(AWS_REGION, "us-east-2"); + + S3AFileSystem fs = new S3AFileSystem(); + fs.initialize(new URI("s3a://landsat-pds"), conf); + + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Region is configured, region probe should not have been made").isEqualTo(0); } @Test - public void testUnsetRegionDefaultEndpoint() throws Throwable { - describe("Create a client with no region and the default endpoint"); + public void testRegionCache() throws IOException, URISyntaxException { Configuration conf = getConfiguration(); conf.unset(AWS_REGION); - createS3Client(conf, DEFAULT_ENDPOINT, AWS_S3_CENTRAL_REGION); + conf.unset("fs.s3a.bucket.landsat-pds.endpoint.region"); + S3AFileSystem fs = new S3AFileSystem(); + + fs.initialize(new URI("s3a://landsat-pds"), conf); + + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Incorrect number of calls made to get bucket region").isEqualTo(1); + + fs.initialize(new URI("s3a://landsat-pds"), conf); + + // value should already be cached. + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Incorrect number of calls made to get bucket region").isEqualTo(0); } - /** - * By setting the system property {@code "aws.region"} we can - * guarantee that the SDK region resolution chain will always succeed - * (and fast). - * Clearly there is no validation of the region during the build process. - */ @Test - public void testBlankRegionTriggersSDKResolution() throws Throwable { - describe("Create a client with a blank region and the default endpoint." - + " This will trigger the SDK Resolution chain"); + public void testEndpointOverride() throws Throwable { + describe("Create a client with no region and the default endpoint"); Configuration conf = getConfiguration(); - conf.set(AWS_REGION, ""); - System.setProperty(AWS_REGION_SYSPROP, MARS_NORTH_2); - try { - createMarsNorth2Client(conf); - } finally { - System.clearProperty(AWS_REGION_SYSPROP); - } + + S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST); + + intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( + HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); } - /** - * Create an S3 client bonded to an invalid region; - * verify that calling {@code getRegion()} triggers - * a failure. - * @param conf configuration to use in the building. - */ - private void createMarsNorth2Client(Configuration conf) throws Exception { - AmazonS3 client = createS3Client(conf, DEFAULT_ENDPOINT, MARS_NORTH_2); - intercept(IllegalArgumentException.class, MARS_NORTH_2, client::getRegion); + + class RegionInterceptor implements ExecutionInterceptor { + private boolean endpointOverridden; + + RegionInterceptor(boolean endpointOverridden) { + this.endpointOverridden = endpointOverridden; + } + + @Override + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { + + if (endpointOverridden) { + Assertions.assertThat( + executionAttributes.getAttribute(AwsExecutionAttribute.ENDPOINT_OVERRIDDEN)) + .describedAs("Endpoint not overridden").isTrue(); + + Assertions.assertThat( + executionAttributes.getAttribute(AwsExecutionAttribute.CLIENT_ENDPOINT).toString()) + .describedAs("There is an endpoint mismatch").isEqualTo("https://" + AWS_ENDPOINT_TEST); + } + + // We don't actually want to make a request, so exit early. + throw AwsServiceException.builder().message("Exception thrown by interceptor").build(); + } } /** @@ -160,16 +156,23 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { * value. * @param conf configuration to use. * @param endpoint endpoint. - * @param expectedRegion expected region * @return the client. * @throws URISyntaxException parse problems. * @throws IOException IO problems */ @SuppressWarnings("deprecation") - private AmazonS3 createS3Client(Configuration conf, - String endpoint, - String expectedRegion) - throws URISyntaxException, IOException { + private S3Client createS3Client(Configuration conf, + String endpoint) + throws IOException { + + boolean endpointOverridden = false; + + if (endpoint != null && !endpoint.isEmpty()) { + endpointOverridden = true; + } + + List interceptors = new ArrayList<>(); + interceptors.add(new RegionInterceptor(endpointOverridden)); DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); @@ -177,16 +180,14 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { S3ClientFactory.S3ClientCreationParameters parameters = new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(new AnonymousAWSCredentialsProvider()) - .withPathUri(new URI("s3a://localhost/")) .withEndpoint(endpoint) .withMetrics(new EmptyS3AStatisticsContext() - .newStatisticsFromAwsSdk()); - AmazonS3 client = factory.createS3Client( - new URI("s3a://localhost/"), + .newStatisticsFromAwsSdk()) + .withExecutionInterceptors(interceptors); + + S3Client client = factory.createS3Client( + getFileSystem().getUri(), parameters); - Assertions.assertThat(client.getRegionName()) - .describedAs("Client region name") - .isEqualTo(expectedRegion); return client; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java index c0f6a4b2322..a741b11b0ce 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java @@ -18,7 +18,8 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.S3Error; import org.assertj.core.api.Assertions; import org.junit.Assume; @@ -27,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.statistics.StoreStatisticNames; import org.apache.hadoop.fs.store.audit.AuditSpan; @@ -34,10 +36,12 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.nio.file.AccessDeniedException; +import java.util.stream.Collectors; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.createFiles; @@ -115,12 +119,12 @@ public class ITestS3AFailureHandling extends AbstractS3ATestBase { } } - private List buildDeleteRequest( + private List buildDeleteRequest( final String[] keys) { - List request = new ArrayList<>( + List request = new ArrayList<>( keys.length); for (String key : keys) { - request.add(new DeleteObjectsRequest.KeyVersion(key)); + request.add(ObjectIdentifier.builder().key(key).build()); } return request; } @@ -156,12 +160,26 @@ public class ITestS3AFailureHandling extends AbstractS3ATestBase { // create a span, expect it to be activated. fs.getAuditSpanSource().createSpan(StoreStatisticNames.OP_DELETE, csvPath.toString(), null); - List keys + List keys = buildDeleteRequest( new String[]{ fs.pathToKey(csvPath), "missing-key.csv" }); + MultiObjectDeleteException ex = intercept( + MultiObjectDeleteException.class, + () -> fs.removeKeys(keys, false)); + final List undeleted = ex.errors().stream() + .map(S3Error::key) + .map(fs::keyToQualifiedPath) + .collect(Collectors.toList()); + final String undeletedFiles = undeleted.stream() + .map(Path::toString) + .collect(Collectors.joining(", ")); + failIf(undeleted.size() != 2, + "undeleted list size wrong: " + undeletedFiles, + ex); + assertTrue("no CSV in " +undeletedFiles, undeleted.contains(csvPath)); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java index 1a944ec2996..6e85f6bc783 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java @@ -25,19 +25,22 @@ import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.AccessDeniedException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.GetBucketEncryptionResult; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetBucketEncryptionRequest; +import software.amazon.awssdk.services.s3.model.GetBucketEncryptionResponse; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; import org.assertj.core.api.Assertions; import org.junit.Assume; import org.junit.Test; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; +import org.apache.hadoop.fs.s3a.impl.RequestFactoryImpl; import org.apache.hadoop.fs.store.audit.AuditSpan; import org.apache.hadoop.fs.store.EtagChecksum; import org.apache.hadoop.test.LambdaTestUtils; @@ -106,15 +109,15 @@ public class ITestS3AMiscOperations extends AbstractS3ATestBase { public void testPutObjectDirect() throws Throwable { final S3AFileSystem fs = getFileSystem(); try (AuditSpan span = span()) { - ObjectMetadata metadata = fs.newObjectMetadata(-1); - metadata.setContentLength(-1); + RequestFactory factory = RequestFactoryImpl.builder().withBucket(fs.getBucket()).build(); Path path = path("putDirect"); - final PutObjectRequest put = new PutObjectRequest(fs.getBucket(), - path.toUri().getPath(), - new ByteArrayInputStream("PUT".getBytes()), - metadata); + PutObjectRequest.Builder putObjectRequestBuilder = + factory.newPutObjectRequestBuilder(path.toUri().getPath(), null, -1, false); + putObjectRequestBuilder.contentLength(-1L); LambdaTestUtils.intercept(IllegalStateException.class, - () -> fs.putObjectDirect(put, PutObjectOptions.keepingDirs(), null)); + () -> fs.putObjectDirect(putObjectRequestBuilder.build(), PutObjectOptions.keepingDirs(), + new S3ADataBlocks.BlockUploadData(new ByteArrayInputStream("PUT".getBytes())), + false, null)); assertPathDoesNotExist("put object was created", path); } } @@ -406,13 +409,15 @@ public class ITestS3AMiscOperations extends AbstractS3ATestBase { * Gets default encryption settings for the bucket or returns null if default * encryption is disabled. */ - private GetBucketEncryptionResult getDefaultEncryption() throws IOException { + private GetBucketEncryptionResponse getDefaultEncryption() throws IOException { S3AFileSystem fs = getFileSystem(); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("check default encryption"); + S3Client s3 = fs.getAmazonS3ClientForTesting("check default encryption"); try { return Invoker.once("getBucketEncryption()", fs.getBucket(), - () -> s3.getBucketEncryption(fs.getBucket())); + () -> s3.getBucketEncryption(GetBucketEncryptionRequest.builder() + .bucket(fs.getBucket()) + .build())); } catch (FileNotFoundException e) { return null; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java index 818d2fc889c..197811f39fb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java @@ -18,13 +18,14 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.MultipartUpload; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.store.audit.AuditSpan; import org.junit.Test; + import java.io.IOException; import java.util.HashSet; import java.util.Set; @@ -114,7 +115,7 @@ public class ITestS3AMultipartUtils extends AbstractS3ATestBase { } private MultipartTestUtils.IdKey toIdKey(MultipartUpload mu) { - return new MultipartTestUtils.IdKey(mu.getKey(), mu.getUploadId()); + return new MultipartTestUtils.IdKey(mu.key(), mu.uploadId()); } private Path getPartFilename(int index) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java index d3925d35a99..5b6ea46cd8a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java @@ -107,7 +107,7 @@ public class ITestS3ARequesterPays extends AbstractS3ATestBase { try (FileSystem fs = requesterPaysPath.getFileSystem(conf)) { intercept( AccessDeniedException.class, - "403 Forbidden", + "403", "Expected requester pays bucket to fail without header set", () -> fs.open(requesterPaysPath).close() ); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java index 7c56f8d2ea0..6ccb7ac2604 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java @@ -37,7 +37,6 @@ import org.apache.hadoop.fs.contract.s3a.S3AContract; import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER; import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_ARRAY; import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_DISK; -import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BYTEBUFFER; import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS; import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER; import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_REDUCED_REDUNDANCY; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java index 0778662542d..35bb709f659 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java @@ -25,10 +25,9 @@ import java.time.Duration; import java.time.OffsetDateTime; import java.util.concurrent.TimeUnit; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.amazonaws.services.securitytoken.model.Credentials; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.Credentials; import org.hamcrest.Matchers; import org.junit.Test; import org.slf4j.Logger; @@ -71,7 +70,6 @@ public class ITestS3ATemporaryCredentials extends AbstractS3ATestBase { private static final Logger LOG = LoggerFactory.getLogger(ITestS3ATemporaryCredentials.class); - @SuppressWarnings("deprecation") private static final String TEMPORARY_AWS_CREDENTIALS = TemporaryAWSCredentialsProvider.NAME; @@ -120,7 +118,7 @@ public class ITestS3ATemporaryCredentials extends AbstractS3ATestBase { credentials = testFS.shareCredentials("testSTS"); String bucket = testFS.getBucket(); - AWSSecurityTokenServiceClientBuilder builder = STSClientFactory.builder( + StsClientBuilder builder = STSClientFactory.builder( conf, bucket, credentials, @@ -154,7 +152,7 @@ public class ITestS3ATemporaryCredentials extends AbstractS3ATestBase { // now create an invalid set of credentials by changing the session // token - conf2.set(SESSION_TOKEN, "invalid-" + sessionCreds.getSessionToken()); + conf2.set(SESSION_TOKEN, "invalid-" + sessionCreds.sessionToken()); try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf2)) { createAndVerifyFile(fs, path("testSTSInvalidToken"), TEST_FILE_SIZE); fail("Expected an access exception, but file access to " @@ -176,14 +174,13 @@ public class ITestS3ATemporaryCredentials extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testTemporaryCredentialValidation() throws Throwable { Configuration conf = new Configuration(); conf.set(ACCESS_KEY, "accesskey"); conf.set(SECRET_KEY, "secretkey"); conf.set(SESSION_TOKEN, ""); LambdaTestUtils.intercept(CredentialInitializationException.class, - () -> new TemporaryAWSCredentialsProvider(conf).getCredentials()); + () -> new TemporaryAWSCredentialsProvider(conf).resolveCredentials()); } /** @@ -360,7 +357,6 @@ public class ITestS3ATemporaryCredentials extends AbstractS3ATestBase { * @return the caught exception. * @throws Exception any unexpected exception. */ - @SuppressWarnings("deprecation") public E expectedSessionRequestFailure( final Class clazz, final String endpoint, @@ -370,15 +366,15 @@ public class ITestS3ATemporaryCredentials extends AbstractS3ATestBase { getFileSystem().shareCredentials("test"); DurationInfo ignored = new DurationInfo(LOG, "requesting credentials")) { Configuration conf = new Configuration(getContract().getConf()); - ClientConfiguration awsConf = - S3AUtils.createAwsConf(conf, null, AWS_SERVICE_IDENTIFIER_STS); + return intercept(clazz, exceptionText, () -> { - AWSSecurityTokenService tokenService = + StsClient tokenService = STSClientFactory.builder(parentCreds, - awsConf, + conf, endpoint, - region) + region, + getFileSystem().getBucket()) .build(); Invoker invoker = new Invoker(new S3ARetryPolicy(conf), LOG_AT_ERROR); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index 658ceb49173..adc4ef4c7c8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -21,9 +21,9 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; import java.net.URI; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.AmazonS3; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.S3Client; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,6 +51,7 @@ import org.apache.hadoop.fs.s3a.test.MinimalWriteOperationHelperCallbacks; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import org.apache.hadoop.util.Progressable; + import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditor; import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTrackerFactory; import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; @@ -116,9 +117,7 @@ public class MockS3AFileSystem extends S3AFileSystem { root = new Path(FS_URI.toString()); } - private static T prepareRequest(T t) { - return t; - } + private static void prepareRequest(SdkRequest.Builder t) {} @Override public RequestFactory getRequestFactory() { @@ -210,7 +209,7 @@ public class MockS3AFileSystem extends S3AFileSystem { * @param client client. */ @Override - public void setAmazonS3Client(AmazonS3 client) { + public void setAmazonS3Client(S3Client client) { LOG.debug("Setting S3 client to {}", client); super.setAmazonS3Client(client); } @@ -353,13 +352,13 @@ public class MockS3AFileSystem extends S3AFileSystem { void deleteObjectAtPath(Path f, String key, boolean isFile) - throws AmazonClientException, IOException { + throws SdkException, IOException { deleteObject(key); } @Override protected void maybeCreateFakeParentDirectory(Path path) - throws IOException, AmazonClientException { + throws IOException, SdkException { // no-op } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index 3240309aef9..0c61caacd05 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -23,32 +23,47 @@ import static org.mockito.Mockito.*; import java.net.URI; import java.util.ArrayList; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.MultipartUploadListing; -import com.amazonaws.services.s3.model.Region; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; +import software.amazon.awssdk.transfer.s3.S3TransferManager; /** - * An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3} + * An {@link S3ClientFactory} that returns Mockito mocks of the {@link S3Client} * interface suitable for unit testing. */ -@SuppressWarnings("deprecation") public class MockS3ClientFactory implements S3ClientFactory { + @Override - public AmazonS3 createS3Client(URI uri, - final S3ClientCreationParameters parameters) { - AmazonS3 s3 = mock(AmazonS3.class); - String bucket = uri.getHost(); - when(s3.doesBucketExist(bucket)).thenReturn(true); - when(s3.doesBucketExistV2(bucket)).thenReturn(true); + public S3Client createS3Client(URI uri, final S3ClientCreationParameters parameters) { + S3Client s3 = mock(S3Client.class); // this listing is used in startup if purging is enabled, so // return a stub value - MultipartUploadListing noUploads = new MultipartUploadListing(); - noUploads.setMultipartUploads(new ArrayList<>(0)); - when(s3.listMultipartUploads(any())) - .thenReturn(noUploads); - when(s3.getBucketLocation(anyString())) - .thenReturn(Region.US_West.toString()); + ListMultipartUploadsResponse noUploads = ListMultipartUploadsResponse.builder() + .uploads(new ArrayList<>(0)) + .isTruncated(false) + .build(); + when(s3.listMultipartUploads((ListMultipartUploadsRequest) any())).thenReturn(noUploads); + when(s3.getBucketLocation((GetBucketLocationRequest) any())).thenReturn( + GetBucketLocationResponse.builder().locationConstraint(Region.US_WEST_2.toString()) + .build()); return s3; } + + @Override + public S3AsyncClient createS3AsyncClient(URI uri, final S3ClientCreationParameters parameters) { + S3AsyncClient s3 = mock(S3AsyncClient.class); + return s3; + } + + @Override + public S3TransferManager createS3TransferManager(S3AsyncClient s3AsyncClient) { + S3TransferManager tm = mock(S3TransferManager.class); + return tm; + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java index 1ddff3c4cd5..2b7620ddbdd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java @@ -18,9 +18,10 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.store.audit.AuditSpan; @@ -33,6 +34,7 @@ import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Date; import java.util.List; import java.util.Objects; import java.util.Set; @@ -80,10 +82,11 @@ public final class MultipartTestUtils { byte[] data = dataset(len, 'a', 'z'); InputStream in = new ByteArrayInputStream(data); String uploadId = writeHelper.initiateMultiPartUpload(key, PutObjectOptions.keepingDirs()); - UploadPartRequest req = writeHelper.newUploadPartRequest(key, uploadId, - partNo, len, in, null, 0L); - PartETag partEtag = writeHelper.uploadPart(req, null).getPartETag(); - LOG.debug("uploaded part etag {}, upid {}", partEtag.getETag(), uploadId); + UploadPartRequest req = writeHelper.newUploadPartRequestBuilder(key, uploadId, + partNo, len).build(); + RequestBody body = RequestBody.fromInputStream(in, len); + UploadPartResponse response = writeHelper.uploadPart(req, body, null); + LOG.debug("uploaded part etag {}, upid {}", response.eTag(), uploadId); return new IdKey(key, uploadId); } } @@ -99,10 +102,10 @@ public final class MultipartTestUtils { = fs.getWriteOperationHelper(); while (uploads.hasNext()) { MultipartUpload upload = uploads.next(); - LOG.debug("Cleaning up upload: {} {}", upload.getKey(), - truncatedUploadId(upload.getUploadId())); - helper.abortMultipartUpload(upload.getKey(), - upload.getUploadId(), true, LOG_EVENT); + LOG.debug("Cleaning up upload: {} {}", upload.key(), + truncatedUploadId(upload.uploadId())); + helper.abortMultipartUpload(upload.key(), + upload.uploadId(), true, LOG_EVENT); } } catch (IOException ioe) { LOG.info("Ignoring exception: ", ioe); @@ -118,8 +121,8 @@ public final class MultipartTestUtils { MultipartUtils.UploadIterator uploads = fs.listUploads(key); while (uploads.hasNext()) { MultipartUpload upload = uploads.next(); - Assert.fail("Found unexpected upload " + upload.getKey() + " " + - truncatedUploadId(upload.getUploadId())); + Assert.fail("Found unexpected upload " + upload.key() + " " + + truncatedUploadId(upload.uploadId())); } } @@ -149,9 +152,9 @@ public final class MultipartTestUtils { return fs .listMultipartUploads(prefix).stream() .map(upload -> String.format("Upload to %s with ID %s; initiated %s", - upload.getKey(), - upload.getUploadId(), - S3ATestUtils.LISTING_FORMAT.format(upload.getInitiated()))) + upload.key(), + upload.uploadId(), + S3ATestUtils.LISTING_FORMAT.format(Date.from(upload.initiated())))) .collect(Collectors.toList()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 469562f9b33..6191687c57e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -61,7 +61,7 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.functional.CallableRaisingIOE; import org.apache.hadoop.util.functional.FutureIO; -import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Assume; @@ -612,8 +612,7 @@ public final class S3ATestUtils { * @return a set of credentials * @throws IOException on a failure */ - @SuppressWarnings("deprecation") - public static AWSCredentialsProvider buildAwsCredentialsProvider( + public static AwsCredentialsProvider buildAwsCredentialsProvider( final Configuration conf) throws IOException { assumeSessionTestsEnabled(conf); @@ -668,13 +667,14 @@ public final class S3ATestUtils { MarshalledCredentials sc = MarshalledCredentialBinding .requestSessionCredentials( buildAwsCredentialsProvider(conf), - S3AUtils.createAwsConf(conf, bucket, AWS_SERVICE_IDENTIFIER_STS), + conf, conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT, DEFAULT_ASSUMED_ROLE_STS_ENDPOINT), conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT_REGION, ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT), duration, - new Invoker(new S3ARetryPolicy(conf), Invoker.LOG_EVENT)); + new Invoker(new S3ARetryPolicy(conf), Invoker.LOG_EVENT), + bucket); sc.validate("requested session credentials: ", MarshalledCredentials.CredentialTypeRequired.SessionOnly); return sc; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 36381bf14b1..8cf8d2db67d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -18,12 +18,13 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.regions.Regions; +import software.amazon.awssdk.regions.Region; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.test.HadoopTestBase; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -47,9 +48,9 @@ public class TestArnResource extends HadoopTestBase { String accessPoint = "testAp"; String[][] regionPartitionEndpoints = new String[][] { - {Regions.EU_WEST_1.getName(), "aws"}, - {Regions.US_GOV_EAST_1.getName(), "aws-us-gov"}, - {Regions.CN_NORTH_1.getName(), "aws-cn"}, + {Region.EU_WEST_1.id(), "aws"}, + {Region.US_GOV_EAST_1.id(), "aws-us-gov"}, + {Region.CN_NORTH_1.id(), "aws-cn"}, }; for (String[] testPair : regionPartitionEndpoints) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java index 35199f40927..0ac49812e4c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java @@ -22,14 +22,14 @@ import java.io.EOFException; import java.io.IOException; import java.io.InterruptedIOException; import java.net.SocketTimeoutException; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.SdkClientException; -import com.amazonaws.services.s3.model.AmazonS3Exception; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.S3Exception; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -38,6 +38,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.net.ConnectTimeoutException; + import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Invoker.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.verifyExceptionClass; @@ -98,9 +99,11 @@ public class TestInvoker extends Assert { private int retryCount; private Invoker invoker = new Invoker(RETRY_POLICY, (text, e, retries, idempotent) -> retryCount++); - private static final AmazonClientException CLIENT_TIMEOUT_EXCEPTION = - new AmazonClientException(new Local.ConnectTimeoutException("timeout")); - private static final AmazonServiceException BAD_REQUEST = serviceException( + private static final SdkException CLIENT_TIMEOUT_EXCEPTION = + SdkException.builder() + .cause(new Local.ConnectTimeoutException("timeout")) + .build(); + private static final AwsServiceException BAD_REQUEST = serviceException( AWSBadRequestException.STATUS_CODE, "bad request"); @@ -109,24 +112,26 @@ public class TestInvoker extends Assert { resetCounters(); } - private static AmazonServiceException serviceException(int code, + private static AwsServiceException serviceException(int code, String text) { - AmazonServiceException ex = new AmazonServiceException(text); - ex.setStatusCode(code); - return ex; + return AwsServiceException.builder() + .message(text) + .statusCode(code) + .build(); } - private static AmazonS3Exception createS3Exception(int code) { + private static S3Exception createS3Exception(int code) { return createS3Exception(code, "", null); } - private static AmazonS3Exception createS3Exception(int code, + private static S3Exception createS3Exception(int code, String message, Throwable inner) { - AmazonS3Exception ex = new AmazonS3Exception(message); - ex.setStatusCode(code); - ex.initCause(inner); - return ex; + return (S3Exception) S3Exception.builder() + .message(message) + .statusCode(code) + .cause(inner) + .build(); } protected void verifyTranslated( @@ -136,7 +141,7 @@ public class TestInvoker extends Assert { } private static E verifyTranslated(Class clazz, - SdkBaseException exception) throws Exception { + SdkException exception) throws Exception { return verifyExceptionClass(clazz, translateException("test", "/", exception)); } @@ -157,16 +162,22 @@ public class TestInvoker extends Assert { @Test public void test500isStatus500Exception() throws Exception { - AmazonServiceException ex = new AmazonServiceException(""); - ex.setStatusCode(500); + AwsServiceException ex = AwsServiceException.builder() + .message("") + .statusCode(500) + .build(); verifyTranslated(AWSStatus500Exception.class, ex); } @Test public void testExceptionsWithTranslatableMessage() throws Exception { - SdkBaseException xmlParsing = new SdkBaseException(EOF_MESSAGE_IN_XML_PARSER); - SdkBaseException differentLength = new SdkBaseException(EOF_READ_DIFFERENT_LENGTH); + SdkException xmlParsing = SdkException.builder() + .message(EOF_MESSAGE_IN_XML_PARSER) + .build(); + SdkException differentLength = SdkException.builder() + .message(EOF_READ_DIFFERENT_LENGTH) + .build(); verifyTranslated(EOFException.class, xmlParsing); verifyTranslated(EOFException.class, differentLength); @@ -178,7 +189,9 @@ public class TestInvoker extends Assert { final AtomicInteger counter = new AtomicInteger(0); invoker.retry("test", null, false, () -> { if (counter.incrementAndGet() < ACTIVE_RETRY_LIMIT) { - throw new SdkClientException(EOF_READ_DIFFERENT_LENGTH); + throw SdkClientException.builder() + .message(EOF_READ_DIFFERENT_LENGTH) + .build(); } }); @@ -190,7 +203,9 @@ public class TestInvoker extends Assert { final AtomicInteger counter = new AtomicInteger(0); invoker.retry("test", null, false, () -> { if (counter.incrementAndGet() < ACTIVE_RETRY_LIMIT) { - throw new SdkClientException(EOF_MESSAGE_IN_XML_PARSER); + throw SdkClientException.builder() + .message(EOF_MESSAGE_IN_XML_PARSER) + .build(); } }); @@ -201,14 +216,36 @@ public class TestInvoker extends Assert { public void testExtractConnectTimeoutException() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException(LOCAL_CONNECTION_TIMEOUT_EX))); + SdkException.builder() + .cause(LOCAL_CONNECTION_TIMEOUT_EX) + .build())); } @Test(expected = SocketTimeoutException.class) public void testExtractSocketTimeoutException() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException(SOCKET_TIMEOUT_EX))); + SdkException.builder() + .cause(SOCKET_TIMEOUT_EX) + .build())); + } + + @Test(expected = org.apache.hadoop.net.ConnectTimeoutException.class) + public void testExtractConnectTimeoutExceptionFromCompletionException() throws Throwable { + throw extractException("", "", + new CompletionException( + SdkException.builder() + .cause(LOCAL_CONNECTION_TIMEOUT_EX) + .build())); + } + + @Test(expected = SocketTimeoutException.class) + public void testExtractSocketTimeoutExceptionFromCompletionException() throws Throwable { + throw extractException("", "", + new CompletionException( + SdkException.builder() + .cause(SOCKET_TIMEOUT_EX) + .build())); } /** @@ -259,7 +296,7 @@ public class TestInvoker extends Assert { ex, retries, false); } - protected AmazonServiceException newThrottledException() { + protected AwsServiceException newThrottledException() { return serviceException( AWSServiceThrottledException.STATUS_CODE, "throttled"); } @@ -354,7 +391,9 @@ public class TestInvoker extends Assert { // connection timeout exceptions are special, but as AWS shades // theirs, we need to string match them verifyTranslated(ConnectTimeoutException.class, - new AmazonClientException(HTTP_CONNECTION_TIMEOUT_EX)); + SdkException.builder() + .cause(HTTP_CONNECTION_TIMEOUT_EX) + .build()); } @Test @@ -362,14 +401,18 @@ public class TestInvoker extends Assert { // connection timeout exceptions are special, but as AWS shades // theirs, we need to string match them verifyTranslated(ConnectTimeoutException.class, - new AmazonClientException(LOCAL_CONNECTION_TIMEOUT_EX)); + SdkException.builder() + .cause(LOCAL_CONNECTION_TIMEOUT_EX) + .build()); } @Test public void testShadedConnectionTimeoutExceptionNotMatching() throws Throwable { InterruptedIOException ex = verifyTranslated(InterruptedIOException.class, - new AmazonClientException(new Local.NotAConnectTimeoutException())); + SdkException.builder() + .cause(new Local.NotAConnectTimeoutException()) + .build()); if (ex instanceof ConnectTimeoutException) { throw ex; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index 730bae0aeb1..44488c259a0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -34,8 +34,14 @@ import javax.annotation.Nullable; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.InstanceProfileCredentialsProvider; +import com.amazonaws.auth.ContainerCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; + +import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.junit.Rule; import org.junit.Test; @@ -52,6 +58,11 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.ABSTRACT_PROVIDER; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.NOT_AWS_V2_PROVIDER; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderSet; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; import static org.junit.Assert.*; @@ -73,7 +84,7 @@ public class TestS3AAWSCredentialsProvider { @Test public void testProviderWrongClass() throws Exception { expectProviderInstantiationFailure(this.getClass(), - NOT_AWS_PROVIDER); + NOT_AWS_V2_PROVIDER); } @Test @@ -103,7 +114,6 @@ public class TestS3AAWSCredentialsProvider { } @Test - @SuppressWarnings("deprecation") public void testInstantiationChain() throws Throwable { Configuration conf = new Configuration(false); conf.set(AWS_CREDENTIALS_PROVIDER, @@ -123,7 +133,6 @@ public class TestS3AAWSCredentialsProvider { } @Test - @SuppressWarnings("deprecation") public void testDefaultChain() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); Configuration conf = new Configuration(false); @@ -148,7 +157,27 @@ public class TestS3AAWSCredentialsProvider { } @Test - @SuppressWarnings("deprecation") + public void testConfiguredChainV1V2() throws Exception { + URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); + List> credentialProviders = + Arrays.asList( + ContainerCredentialsProvider.class, + AnonymousAWSCredentialsProvider.class); + List> expectedClasses = + Arrays.asList( + V1V2AwsCredentialProviderAdapter.class, + AnonymousAWSCredentialsProvider.class); + Configuration conf = + createProviderConfiguration(buildClassListString(credentialProviders)); + AWSCredentialProviderList list1 = createAWSCredentialProviderSet( + uri1, conf); + AWSCredentialProviderList list2 = createAWSCredentialProviderSet( + uri2, conf); + assertCredentialProviders(expectedClasses, list1); + assertCredentialProviders(expectedClasses, list2); + } + + @Test public void testConfiguredChain() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); List> expectedClasses = @@ -167,7 +196,6 @@ public class TestS3AAWSCredentialsProvider { } @Test - @SuppressWarnings("deprecation") public void testConfiguredChainUsesSharedInstanceProfile() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); Configuration conf = new Configuration(false); @@ -227,7 +255,7 @@ public class TestS3AAWSCredentialsProvider { * A credential provider whose constructor raises an NPE. */ protected static class ConstructorFailureProvider - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { @SuppressWarnings("unused") public ConstructorFailureProvider() { @@ -235,13 +263,10 @@ public class TestS3AAWSCredentialsProvider { } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { return null; } - @Override - public void refresh() { - } } @Test @@ -359,12 +384,12 @@ public class TestS3AAWSCredentialsProvider { List> expectedClasses, AWSCredentialProviderList list) { assertNotNull(list); - List providers = list.getProviders(); + List providers = list.getProviders(); assertEquals(expectedClasses.size(), providers.size()); for (int i = 0; i < expectedClasses.size(); ++i) { Class expectedClass = expectedClasses.get(i); - AWSCredentialsProvider provider = providers.get(i); + AwsCredentialsProvider provider = providers.get(i); assertNotNull( String.format("At position %d, expected class is %s, but found null.", i, expectedClass), provider); @@ -380,7 +405,6 @@ public class TestS3AAWSCredentialsProvider { * @see S3ATestUtils#authenticationContains(Configuration, String). */ @Test - @SuppressWarnings("deprecation") public void testAuthenticationContainsProbes() { Configuration conf = new Configuration(false); assertFalse("found AssumedRoleCredentialProvider", @@ -398,7 +422,7 @@ public class TestS3AAWSCredentialsProvider { // verify you can't get credentials from it NoAuthWithAWSException noAuth = intercept(NoAuthWithAWSException.class, AWSCredentialProviderList.NO_AWS_CREDENTIAL_PROVIDERS, - () -> providers.getCredentials()); + () -> providers.resolveCredentials()); // but that it closes safely providers.close(); @@ -447,11 +471,10 @@ public class TestS3AAWSCredentialsProvider { providers.close(); assertEquals("Ref count after close() for " + providers, 0, providers.getRefCount()); - providers.refresh(); intercept(NoAuthWithAWSException.class, AWSCredentialProviderList.CREDENTIALS_REQUESTED_WHEN_CLOSED, - () -> providers.getCredentials()); + () -> providers.resolveCredentials()); } /** @@ -488,17 +511,8 @@ public class TestS3AAWSCredentialsProvider { } } - private static final AWSCredentials EXPECTED_CREDENTIALS = new AWSCredentials() { - @Override - public String getAWSAccessKeyId() { - return "expectedAccessKey"; - } - - @Override - public String getAWSSecretKey() { - return "expectedSecret"; - } - }; + private static final AwsCredentials EXPECTED_CREDENTIALS = + AwsBasicCredentials.create("expectedAccessKey", "expectedSecret"); /** * Credential provider that takes a long time. @@ -510,7 +524,7 @@ public class TestS3AAWSCredentialsProvider { } @Override - protected AWSCredentials createCredentials(Configuration config) throws IOException { + protected AwsCredentials createCredentials(Configuration config) throws IOException { // yield to other callers to induce race condition Thread.yield(); return EXPECTED_CREDENTIALS; @@ -584,7 +598,7 @@ public class TestS3AAWSCredentialsProvider { } @Override - protected AWSCredentials createCredentials(Configuration config) throws IOException { + protected AwsCredentials createCredentials(Configuration config) throws IOException { throw new IOException("expected error"); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java index ffa2c81e58a..4f329afe7ad 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java @@ -30,7 +30,6 @@ import org.apache.hadoop.util.Progressable; import org.junit.Before; import org.junit.Test; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.concurrent.ExecutorService; @@ -105,16 +104,14 @@ public class TestS3ABlockOutputStream extends AbstractS3AMockTest { noopAuditor(conf), AuditTestSupport.NOOP_SPAN, new MinimalWriteOperationHelperCallbacks()); - ByteArrayInputStream inputStream = new ByteArrayInputStream( - "a".getBytes()); // first one works String key = "destKey"; - woh.newUploadPartRequest(key, - "uploadId", 1, 1024, inputStream, null, 0L); + woh.newUploadPartRequestBuilder(key, + "uploadId", 1, 1024); // but ask past the limit and a PathIOE is raised intercept(PathIOException.class, key, - () -> woh.newUploadPartRequest(key, - "uploadId", 50000, 1024, inputStream, null, 0L)); + () -> woh.newUploadPartRequestBuilder(key, + "uploadId", 50000, 1024)); } static class StreamClosedException extends IOException {} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java index 62a99d72092..1f85c8fdef3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java @@ -27,9 +27,9 @@ import java.io.IOException; import java.net.URI; import java.util.Date; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.Path; import org.junit.Test; import org.mockito.ArgumentMatcher; + /** * deleteOnExit test for S3A. */ @@ -74,25 +75,25 @@ public class TestS3ADeleteOnExit extends AbstractS3AMockTest { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); testFs.initialize(uri, conf); - AmazonS3 testS3 = testFs.getAmazonS3ClientForTesting("mocking"); + S3Client testS3 = testFs.getAmazonS3ClientForTesting("mocking"); Path path = new Path("/file"); String key = path.toUri().getPath().substring(1); - ObjectMetadata meta = new ObjectMetadata(); - meta.setContentLength(1L); - meta.setLastModified(new Date(2L)); - when(testS3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) - .thenReturn(meta); + HeadObjectResponse objectMetadata = + HeadObjectResponse.builder().contentLength(1L).lastModified(new Date(2L).toInstant()) + .build(); + when(testS3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) + .thenReturn(objectMetadata); testFs.deleteOnExit(path); testFs.close(); assertEquals(0, testFs.getDeleteOnDnExitCount()); } - private ArgumentMatcher correctGetMetadataRequest( + private ArgumentMatcher correctGetMetadataRequest( String bucket, String key) { return request -> request != null - && request.getBucketName().equals(bucket) - && request.getKey().equals(key); + && request.bucket().equals(bucket) + && request.key().equals(key); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index fd649c436bf..33e6b4a0814 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*; import static org.junit.Assert.*; import java.io.EOFException; @@ -30,18 +30,20 @@ import java.io.IOException; import java.io.InterruptedIOException; import java.net.SocketTimeoutException; import java.nio.file.AccessDeniedException; -import java.util.Collections; -import java.util.Map; import java.util.concurrent.ExecutionException; +import java.util.function.Consumer; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.services.s3.model.AmazonS3Exception; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.http.SdkHttpResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; import org.junit.Test; import org.apache.hadoop.fs.s3a.impl.ErrorTranslation; + import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; /** @@ -58,21 +60,26 @@ public class TestS3AExceptionTranslation { = new SocketTimeoutException("socket"); @Test - public void test301ContainsEndpoint() throws Exception { - String bucket = "bucket.s3-us-west-2.amazonaws.com"; - int sc301 = 301; - AmazonS3Exception s3Exception = createS3Exception("wrong endpoint", sc301, - Collections.singletonMap(S3AUtils.ENDPOINT_KEY, - bucket)); + public void test301ContainsRegion() throws Exception { + String region = "us-west-1"; + + AwsErrorDetails redirectError = AwsErrorDetails.builder() + .sdkHttpResponse( + SdkHttpResponse.builder().putHeader(BUCKET_REGION_HEADER, region).build()) + .build(); + + S3Exception s3Exception = createS3Exception("wrong region", + SC_301_MOVED_PERMANENTLY, + redirectError); AWSRedirectException ex = verifyTranslated( AWSRedirectException.class, s3Exception); - assertStatusCode(sc301, ex); + assertStatusCode(SC_301_MOVED_PERMANENTLY, ex); assertNotNull(ex.getMessage()); - assertContained(ex.getMessage(), bucket); - assertContained(ex.getMessage(), ENDPOINT); - assertExceptionContains(ENDPOINT, ex, "endpoint"); - assertExceptionContains(bucket, ex, "bucket name"); + assertContained(ex.getMessage(), region); + assertContained(ex.getMessage(), AWS_REGION); + assertExceptionContains(AWS_REGION, ex, "region"); + assertExceptionContains(region, ex, "region name"); } protected void assertContained(String text, String contained) { @@ -88,17 +95,17 @@ public class TestS3AExceptionTranslation { @Test public void test400isBad() throws Exception { - verifyTranslated(400, AWSBadRequestException.class); + verifyTranslated(SC_400_BAD_REQUEST, AWSBadRequestException.class); } @Test public void test401isNotPermittedFound() throws Exception { - verifyTranslated(401, AccessDeniedException.class); + verifyTranslated(SC_401_UNAUTHORIZED, AccessDeniedException.class); } @Test public void test403isNotPermittedFound() throws Exception { - verifyTranslated(403, AccessDeniedException.class); + verifyTranslated(SC_403_FORBIDDEN, AccessDeniedException.class); } /** @@ -106,7 +113,7 @@ public class TestS3AExceptionTranslation { */ @Test public void test404isNotFound() throws Exception { - verifyTranslated(SC_404, FileNotFoundException.class); + verifyTranslated(SC_404_NOT_FOUND, FileNotFoundException.class); } /** @@ -114,8 +121,11 @@ public class TestS3AExceptionTranslation { */ @Test public void testUnknownBucketException() throws Exception { - AmazonS3Exception ex404 = createS3Exception(SC_404); - ex404.setErrorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET); + S3Exception ex404 = createS3Exception(b -> b + .statusCode(SC_404_NOT_FOUND) + .awsErrorDetails(AwsErrorDetails.builder() + .errorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET) + .build())); verifyTranslated( UnknownStoreException.class, ex404); @@ -123,12 +133,12 @@ public class TestS3AExceptionTranslation { @Test public void test410isNotFound() throws Exception { - verifyTranslated(410, FileNotFoundException.class); + verifyTranslated(SC_410_GONE, FileNotFoundException.class); } @Test public void test416isEOF() throws Exception { - verifyTranslated(416, EOFException.class); + verifyTranslated(SC_416_RANGE_NOT_SATISFIABLE, EOFException.class); } @Test @@ -143,19 +153,21 @@ public class TestS3AExceptionTranslation { @Test public void testGenericServiceS3Exception() throws Exception { // service exception of no known type - AmazonServiceException ase = new AmazonServiceException("unwind"); - ase.setStatusCode(500); + AwsServiceException ase = AwsServiceException.builder() + .message("unwind") + .statusCode(SC_500_INTERNAL_SERVER_ERROR) + .build(); AWSServiceIOException ex = verifyTranslated( AWSStatus500Exception.class, ase); - assertStatusCode(500, ex); + assertStatusCode(SC_500_INTERNAL_SERVER_ERROR, ex); } protected void assertStatusCode(int expected, AWSServiceIOException ex) { assertNotNull("Null exception", ex); - if (expected != ex.getStatusCode()) { + if (expected != ex.statusCode()) { throw new AssertionError("Expected status code " + expected - + "but got " + ex.getStatusCode(), + + "but got " + ex.statusCode(), ex); } } @@ -164,23 +176,35 @@ public class TestS3AExceptionTranslation { public void testGenericClientException() throws Exception { // Generic Amazon exception verifyTranslated(AWSClientIOException.class, - new AmazonClientException("")); + SdkException.builder().message("").build()); } - private static AmazonS3Exception createS3Exception(int code) { - return createS3Exception("", code, null); + private static S3Exception createS3Exception( + Consumer consumer) { + S3Exception.Builder builder = S3Exception.builder() + .awsErrorDetails(AwsErrorDetails.builder() + .build()); + consumer.accept(builder); + return (S3Exception) builder.build(); } - private static AmazonS3Exception createS3Exception(String message, int code, - Map additionalDetails) { - AmazonS3Exception source = new AmazonS3Exception(message); - source.setStatusCode(code); - source.setAdditionalDetails(additionalDetails); + private static S3Exception createS3Exception(int code) { + return createS3Exception(b -> b.message("").statusCode(code)); + } + + private static S3Exception createS3Exception(String message, int code, + AwsErrorDetails additionalDetails) { + + S3Exception source = (S3Exception) S3Exception.builder() + .message(message) + .statusCode(code) + .awsErrorDetails(additionalDetails) + .build(); return source; } private static E verifyTranslated(Class clazz, - AmazonClientException exception) throws Exception { + SdkException exception) throws Exception { // Verifying that the translated exception have the correct error message. IOException ioe = translateException("test", "/", exception); assertExceptionContains(exception.getMessage(), ioe, @@ -212,16 +236,18 @@ public class TestS3AExceptionTranslation { public void testExtractInterrupted() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException( - new InterruptedException("")))); + SdkException.builder() + .cause(new InterruptedException("")) + .build())); } @Test(expected = InterruptedIOException.class) public void testExtractInterruptedIO() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException( - new InterruptedIOException("")))); + SdkException.builder() + .cause(new InterruptedIOException("")) + .build())); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java index 34a275b580f..1a2a21a6e51 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java @@ -21,21 +21,22 @@ package org.apache.hadoop.fs.s3a; import static org.junit.Assert.*; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.io.FileNotFoundException; +import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3ObjectSummary; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.S3Object; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -44,6 +45,7 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Test; import org.mockito.ArgumentMatcher; + /** * S3A tests for getFileStatus using mock S3 client. */ @@ -53,17 +55,16 @@ public class TestS3AGetFileStatus extends AbstractS3AMockTest { public void testFile() throws Exception { Path path = new Path("/file"); String key = path.toUri().getPath().substring(1); - ObjectMetadata meta = new ObjectMetadata(); - meta.setContentLength(1L); - meta.setLastModified(new Date(2L)); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) - .thenReturn(meta); + HeadObjectResponse objectMetadata = + HeadObjectResponse.builder().contentLength(1L).lastModified(new Date(2L).toInstant()) + .build(); + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))).thenReturn(objectMetadata); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); assertTrue(stat.isFile()); - assertEquals(meta.getContentLength(), stat.getLen()); - assertEquals(meta.getLastModified().getTime(), stat.getModificationTime()); + assertEquals(objectMetadata.contentLength().longValue(), stat.getLen()); + assertEquals(Date.from(objectMetadata.lastModified()).getTime(), stat.getModificationTime()); ContractTestUtils.assertNotErasureCoded(fs, path); assertTrue(path + " should have erasure coding unset in " + "FileStatus#toString(): " + stat, @@ -74,17 +75,16 @@ public class TestS3AGetFileStatus extends AbstractS3AMockTest { public void testFakeDirectory() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); String keyDir = key + "/"; - ListObjectsV2Result listResult = new ListObjectsV2Result(); - S3ObjectSummary objectSummary = new S3ObjectSummary(); - objectSummary.setKey(keyDir); - objectSummary.setSize(0L); - listResult.getObjectSummaries().add(objectSummary); + List s3Objects = new ArrayList<>(1); + s3Objects.add(S3Object.builder().key(keyDir).size(0L).build()); + ListObjectsV2Response listObjectsV2Response = + ListObjectsV2Response.builder().contents(s3Objects).build(); when(s3.listObjectsV2(argThat( matchListV2Request(BUCKET, keyDir)) - )).thenReturn(listResult); + )).thenReturn(listObjectsV2Response); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -95,12 +95,13 @@ public class TestS3AGetFileStatus extends AbstractS3AMockTest { public void testImplicitDirectory() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3.getObjectMetadata(argThat( + when(s3.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/")) )).thenThrow(NOT_FOUND); - setupListMocks(Collections.singletonList("dir/"), Collections.emptyList()); + setupListMocks(Collections.singletonList(CommonPrefix.builder().prefix("dir/").build()), + Collections.emptyList()); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -115,9 +116,9 @@ public class TestS3AGetFileStatus extends AbstractS3AMockTest { public void testRoot() throws Exception { Path path = new Path("/"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3.getObjectMetadata(argThat( + when(s3.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); setupListMocks(Collections.emptyList(), Collections.emptyList()); @@ -132,9 +133,9 @@ public class TestS3AGetFileStatus extends AbstractS3AMockTest { public void testNotFound() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3.getObjectMetadata(argThat( + when(s3.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); setupListMocks(Collections.emptyList(), Collections.emptyList()); @@ -142,36 +143,38 @@ public class TestS3AGetFileStatus extends AbstractS3AMockTest { fs.getFileStatus(path); } - private void setupListMocks(List prefixes, - List summaries) { - + private void setupListMocks(List prefixes, + List s3Objects) { // V1 list API mock - ObjectListing objects = mock(ObjectListing.class); - when(objects.getCommonPrefixes()).thenReturn(prefixes); - when(objects.getObjectSummaries()).thenReturn(summaries); - when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(objects); + ListObjectsResponse v1Response = ListObjectsResponse.builder() + .commonPrefixes(prefixes) + .contents(s3Objects) + .build(); + when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(v1Response); // V2 list API mock - ListObjectsV2Result v2Result = mock(ListObjectsV2Result.class); - when(v2Result.getCommonPrefixes()).thenReturn(prefixes); - when(v2Result.getObjectSummaries()).thenReturn(summaries); - when(s3.listObjectsV2(any(ListObjectsV2Request.class))) - .thenReturn(v2Result); + ListObjectsV2Response v2Result = ListObjectsV2Response.builder() + .commonPrefixes(prefixes) + .contents(s3Objects) + .build(); + when(s3.listObjectsV2( + any(software.amazon.awssdk.services.s3.model.ListObjectsV2Request.class))).thenReturn( + v2Result); } - private ArgumentMatcher correctGetMetadataRequest( + private ArgumentMatcher correctGetMetadataRequest( String bucket, String key) { return request -> request != null - && request.getBucketName().equals(bucket) - && request.getKey().equals(key); + && request.bucket().equals(bucket) + && request.key().equals(key); } private ArgumentMatcher matchListV2Request( String bucket, String key) { return (ListObjectsV2Request request) -> { return request != null - && request.getBucketName().equals(bucket) - && request.getPrefix().equals(key); + && request.bucket().equals(bucket) + && request.prefix().equals(key); }; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java index c62bf5daca3..da1284343da 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java @@ -19,16 +19,18 @@ package org.apache.hadoop.fs.s3a; import javax.net.ssl.SSLException; +import java.io.FilterInputStream; import java.io.IOException; import java.net.SocketException; import java.nio.charset.StandardCharsets; import java.util.concurrent.CompletableFuture; -import com.amazonaws.SdkClientException; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.junit.Test; import org.apache.commons.io.IOUtils; @@ -37,6 +39,7 @@ import org.apache.hadoop.fs.s3a.audit.impl.NoopSpan; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.util.functional.CallableRaisingIOE; + import static java.lang.Math.min; import static org.apache.hadoop.util.functional.FutureIO.eval; import static org.junit.Assert.assertArrayEquals; @@ -121,13 +124,22 @@ public class TestS3AInputStreamRetry extends AbstractS3AMockTest { * @return mocked object. */ private S3AInputStream.InputStreamCallbacks getMockedInputStreamCallback() { - return new S3AInputStream.InputStreamCallbacks() { + GetObjectResponse objectResponse = GetObjectResponse.builder() + .eTag("test-etag") + .build(); - private final S3Object mockedS3Object = getMockedS3Object(); + ResponseInputStream[] responseInputStreams = + new ResponseInputStream[] { + getMockedInputStream(objectResponse, true), + getMockedInputStream(objectResponse, true), + getMockedInputStream(objectResponse, false) + }; + + return new S3AInputStream.InputStreamCallbacks() { private Integer mockedS3ObjectIndex = 0; @Override - public S3Object getObject(GetObjectRequest request) { + public ResponseInputStream getObject(GetObjectRequest request) { // Set s3 client to return mocked s3object with defined read behavior. mockedS3ObjectIndex++; // open() -> lazySeek() -> reopen() @@ -144,14 +156,17 @@ public class TestS3AInputStreamRetry extends AbstractS3AMockTest { // -> getObjectContent(objectInputStreamGood)-> objectInputStreamGood // -> wrappedStream.read if (mockedS3ObjectIndex == 3) { - throw new SdkClientException("Failed to get S3Object"); + throw AwsServiceException.builder() + .message("Failed to get S3Object") + .awsErrorDetails(AwsErrorDetails.builder().errorCode("test-code").build()) + .build(); } - return mockedS3Object; + return responseInputStreams[min(mockedS3ObjectIndex, responseInputStreams.length) - 1]; } @Override - public GetObjectRequest newGetRequest(String key) { - return new GetObjectRequest(fs.getBucket(), key); + public GetObjectRequest.Builder newGetRequestBuilder(String key) { + return GetObjectRequest.builder().bucket(fs.getBucket()).key(key); } @Override @@ -166,70 +181,41 @@ public class TestS3AInputStreamRetry extends AbstractS3AMockTest { } /** - * Get mocked S3Object that returns bad input stream on the initial of - * getObjectContent calls. - * - * @return mocked object. - */ - private S3Object getMockedS3Object() { - S3ObjectInputStream objectInputStreamBad1 = getMockedInputStream(true); - S3ObjectInputStream objectInputStreamBad2 = getMockedInputStream(true); - S3ObjectInputStream objectInputStreamGood = getMockedInputStream(false); - - return new S3Object() { - private final S3ObjectInputStream[] inputStreams = - {objectInputStreamBad1, objectInputStreamBad2, objectInputStreamGood}; - - private Integer inputStreamIndex = 0; - - @Override - public S3ObjectInputStream getObjectContent() { - // Set getObjectContent behavior: - // Returns bad stream twice, and good stream afterwards. - inputStreamIndex++; - return inputStreams[min(inputStreamIndex, inputStreams.length) - 1]; - } - - @Override - public ObjectMetadata getObjectMetadata() { - // Set getObjectMetadata behavior: returns dummy metadata - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setHeader("ETag", "test-etag"); - return metadata; - } - }; - } - - /** - * Get mocked S3ObjectInputStream where we can trigger IOException to + * Get mocked ResponseInputStream where we can trigger IOException to * simulate the read failure. * * @param triggerFailure true when a failure injection is enabled. * @return mocked object. */ - private S3ObjectInputStream getMockedInputStream(boolean triggerFailure) { - return new S3ObjectInputStream(IOUtils.toInputStream(INPUT, StandardCharsets.UTF_8), null) { + private ResponseInputStream getMockedInputStream( + GetObjectResponse objectResponse, boolean triggerFailure) { - private final IOException exception = - new SSLException(new SocketException("Connection reset")); + FilterInputStream inputStream = + new FilterInputStream(IOUtils.toInputStream(INPUT, StandardCharsets.UTF_8)) { - @Override - public int read() throws IOException { - int result = super.read(); - if (triggerFailure) { - throw exception; - } - return result; - } + private final IOException exception = + new SSLException(new SocketException("Connection reset")); - @Override - public int read(byte[] b, int off, int len) throws IOException { - int result = super.read(b, off, len); - if (triggerFailure) { - throw exception; - } - return result; - } - }; + @Override + public int read() throws IOException { + int result = super.read(); + if (triggerFailure) { + throw exception; + } + return result; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int result = super.read(b, off, len); + if (triggerFailure) { + throw exception; + } + return result; + } + }; + + return new ResponseInputStream(objectResponse, + AbortableInputStream.create(inputStream, () -> {})); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java index e05ee25adfa..0982c8cbd47 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java @@ -20,18 +20,17 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; import org.assertj.core.api.Assertions; import org.junit.Test; +import software.amazon.awssdk.http.apache.ProxyConfiguration; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.apache.hadoop.test.AbstractHadoopTestBase; import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST; import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT; import static org.apache.hadoop.fs.s3a.Constants.PROXY_SECURED; -import static org.apache.hadoop.fs.s3a.S3AUtils.initProxySupport; /** * Tests to verify {@link S3AUtils} translates the proxy configurations @@ -79,11 +78,16 @@ public class TestS3AProxy extends AbstractHadoopTestBase { private void verifyProxy(Configuration proxyConfig, boolean isExpectedSecured) throws IOException { - ClientConfiguration awsConf = new ClientConfiguration(); - initProxySupport(proxyConfig, "test-bucket", awsConf); - Assertions.assertThat(awsConf.getProxyProtocol()) + ProxyConfiguration config = + AWSClientConfig.createProxyConfiguration(proxyConfig, "testBucket"); + ProxyConfiguration asyncConfig = + AWSClientConfig.createProxyConfiguration(proxyConfig, "testBucket"); + Assertions.assertThat(config.scheme()) .describedAs("Proxy protocol not as expected") - .isEqualTo(isExpectedSecured ? Protocol.HTTPS : Protocol.HTTP); + .isEqualTo(isExpectedSecured ? "https" : "http"); + Assertions.assertThat(asyncConfig.scheme()) + .describedAs("Proxy protocol not as expected") + .isEqualTo(isExpectedSecured ? "https" : "http"); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java index 0e105c25c3a..643db02087b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java @@ -18,17 +18,22 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.junit.Test; + import java.io.IOException; -import java.util.Date; +import java.io.InputStream; +import java.time.Instant; import static org.junit.Assert.assertEquals; @@ -40,10 +45,10 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; /** - * Uses mocks to check that the {@link S3ObjectInputStream} is closed when - * {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer} is called. Unlike the - * other unbuffer tests, this specifically tests that the underlying S3 object - * stream is closed. + * Uses mocks to check that the {@link ResponseInputStream} is + * closed when {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer} is called. + * Unlike the other unbuffer tests, this specifically tests that the underlying + * S3 object stream is closed. */ public class TestS3AUnbuffer extends AbstractS3AMockTest { @@ -51,22 +56,27 @@ public class TestS3AUnbuffer extends AbstractS3AMockTest { public void testUnbuffer() throws IOException { // Create mock ObjectMetadata for getFileStatus() Path path = new Path("/file"); - ObjectMetadata meta = mock(ObjectMetadata.class); - when(meta.getContentLength()).thenReturn(1L); - when(meta.getLastModified()).thenReturn(new Date(2L)); - when(meta.getETag()).thenReturn("mock-etag"); - when(s3.getObjectMetadata(any())).thenReturn(meta); + HeadObjectResponse objectMetadata = HeadObjectResponse.builder() + .contentLength(1L) + .lastModified(Instant.ofEpochMilli(2L)) + .eTag("mock-etag") + .build(); + when(s3.headObject((HeadObjectRequest) any())).thenReturn(objectMetadata); - // Create mock S3ObjectInputStream and S3Object for open() - S3ObjectInputStream objectStream = mock(S3ObjectInputStream.class); + // Create mock ResponseInputStream and GetObjectResponse for open() + GetObjectResponse objectResponse = GetObjectResponse.builder() + .contentLength(1L) + .lastModified(Instant.ofEpochMilli(2L)) + .eTag("mock-etag") + .build(); + InputStream objectStream = mock(InputStream.class); when(objectStream.read()).thenReturn(-1); when(objectStream.read(any(byte[].class))).thenReturn(-1); when(objectStream.read(any(byte[].class), anyInt(), anyInt())).thenReturn(-1); - - S3Object s3Object = mock(S3Object.class); - when(s3Object.getObjectContent()).thenReturn(objectStream); - when(s3Object.getObjectMetadata()).thenReturn(meta); - when(s3.getObject(any())).thenReturn(s3Object); + ResponseInputStream getObjectResponseInputStream = + new ResponseInputStream(objectResponse, + AbortableInputStream.create(objectStream, () -> {})); + when(s3.getObject((GetObjectRequest) any())).thenReturn(getObjectResponseInputStream); // Call read and then unbuffer FSDataInputStream stream = fs.open(path); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java index 42de7cdffc8..66d9032e858 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java @@ -18,18 +18,18 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.Headers; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.transfer.model.CopyResult; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.CopyObjectResult; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; @@ -40,6 +40,7 @@ import org.apache.hadoop.test.HadoopTestBase; import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.CHANGE_DETECTED; import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.createPolicy; import static org.apache.hadoop.fs.s3a.impl.ChangeTracker.CHANGE_REPORTED_BY_S3; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_412_PRECONDITION_FAILED; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -68,7 +69,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { ChangeDetectionPolicy.Source.VersionId, false); assertFalse("Tracker should not have applied contraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse(null, null), "", 0); @@ -96,7 +97,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { ChangeDetectionPolicy.Source.ETag, false); assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse("e1", null), "", 0); @@ -122,13 +123,13 @@ public class TestStreamChangeTracker extends HadoopTestBase { ChangeDetectionPolicy.Source.VersionId, false); assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse(null, "rev1"), "", 0); assertTrackerMismatchCount(tracker, 0); assertRevisionId(tracker, "rev1"); - GetObjectRequest request = newGetObjectRequest(); + GetObjectRequest request = newGetObjectRequestBuilder().build(); expectChangeException(tracker, newResponse(null, "rev2"), "change detected"); // mismatch was noted (so gets to FS stats) @@ -149,14 +150,14 @@ public class TestStreamChangeTracker extends HadoopTestBase { ChangeDetectionPolicy.Source.VersionId, false); assertFalse("Tracker should not have applied contraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse(null, "rev1"), "", 0); assertTrackerMismatchCount(tracker, 0); assertRevisionId(tracker, "rev1"); - GetObjectRequest request = newGetObjectRequest(); - assertConstraintApplied(tracker, request); + GetObjectRequest.Builder builder = newGetObjectRequestBuilder(); + assertConstraintApplied(tracker, builder); // now, the tracker expects a null response expectChangeException(tracker, null, CHANGE_REPORTED_BY_S3); assertTrackerMismatchCount(tracker, 1); @@ -249,31 +250,33 @@ public class TestStreamChangeTracker extends HadoopTestBase { // 412 is translated to RemoteFileChangedException // note: this scenario is never currently hit due to // https://github.com/aws/aws-sdk-java/issues/1644 - AmazonServiceException awsException = - new AmazonServiceException("aws exception"); - awsException.setStatusCode(ChangeTracker.SC_PRECONDITION_FAILED); + AwsServiceException awsException = + AwsServiceException.builder() + .message("aws exception") + .statusCode(SC_412_PRECONDITION_FAILED) + .build(); expectChangeException(tracker, awsException, "copy", RemoteFileChangedException.PRECONDITIONS_FAILED); // processing another type of exception does nothing - tracker.processException(new SdkBaseException("foo"), "copy"); + tracker.processException(SdkException.builder().message("foo").build(), "copy"); } protected void assertConstraintApplied(final ChangeTracker tracker, - final GetObjectRequest request) { + final GetObjectRequest.Builder builder) { assertTrue("Tracker should have applied contraints " + tracker, - tracker.maybeApplyConstraint(request)); + tracker.maybeApplyConstraint(builder)); } protected void assertConstraintApplied(final ChangeTracker tracker, - final CopyObjectRequest request) throws PathIOException { + final CopyObjectRequest.Builder requestBuilder) throws PathIOException { assertTrue("Tracker should have applied contraints " + tracker, - tracker.maybeApplyConstraint(request)); + tracker.maybeApplyConstraint(requestBuilder)); } protected RemoteFileChangedException expectChangeException( final ChangeTracker tracker, - final S3Object response, + final GetObjectResponse response, final String message) throws Exception { return expectException(tracker, response, message, RemoteFileChangedException.class); @@ -281,7 +284,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { protected RemoteFileChangedException expectChangeException( final ChangeTracker tracker, - final SdkBaseException exception, + final SdkException exception, final String operation, final String message) throws Exception { return expectException(tracker, exception, operation, message, @@ -290,7 +293,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { protected PathIOException expectNoVersionAttributeException( final ChangeTracker tracker, - final S3Object response, + final GetObjectResponse response, final String message) throws Exception { return expectException(tracker, response, message, NoVersionAttributeException.class); @@ -298,7 +301,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { protected PathIOException expectNoVersionAttributeException( final ChangeTracker tracker, - final CopyResult response, + final CopyObjectResponse response, final String message) throws Exception { return expectException(tracker, response, message, NoVersionAttributeException.class); @@ -306,7 +309,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { protected T expectException( final ChangeTracker tracker, - final S3Object response, + final GetObjectResponse response, final String message, final Class clazz) throws Exception { return intercept( @@ -320,7 +323,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { protected T expectException( final ChangeTracker tracker, - final CopyResult response, + final CopyObjectResponse response, final String message, final Class clazz) throws Exception { return intercept( @@ -334,7 +337,7 @@ public class TestStreamChangeTracker extends HadoopTestBase { protected T expectException( final ChangeTracker tracker, - final SdkBaseException exception, + final SdkException exception, final String operation, final String message, final Class clazz) throws Exception { @@ -389,48 +392,36 @@ public class TestStreamChangeTracker extends HadoopTestBase { if (objectAttributes.getVersionId() == null && objectAttributes.getETag() == null) { assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); } return tracker; } - private GetObjectRequest newGetObjectRequest() { - return new GetObjectRequest(BUCKET, OBJECT); + private GetObjectRequest.Builder newGetObjectRequestBuilder() { + return GetObjectRequest.builder().bucket(BUCKET).key(OBJECT); } - private CopyObjectRequest newCopyObjectRequest() { - return new CopyObjectRequest(BUCKET, OBJECT, BUCKET, DEST_OBJECT); + private CopyObjectRequest.Builder newCopyObjectRequest() { + return CopyObjectRequest.builder().sourceBucket(BUCKET).sourceKey(OBJECT) + .destinationBucket(BUCKET).destinationKey(DEST_OBJECT); } - private CopyResult newCopyResult(String eTag, String versionId) { - CopyResult copyResult = new CopyResult(); - copyResult.setSourceBucketName(BUCKET); - copyResult.setSourceKey(OBJECT); - copyResult.setDestinationBucketName(BUCKET); - copyResult.setDestinationKey(DEST_OBJECT); - copyResult.setETag(eTag); - copyResult.setVersionId(versionId); - return copyResult; + private CopyObjectResponse newCopyResult(String eTag, String versionId) { + CopyObjectResponse.Builder copyObjectResponseBuilder = CopyObjectResponse.builder(); + + return copyObjectResponseBuilder.versionId(versionId) + .copyObjectResult(CopyObjectResult.builder().eTag(eTag).build()).build(); } - private S3Object newResponse(String etag, String versionId) { - ObjectMetadata md = new ObjectMetadata(); + private GetObjectResponse newResponse(String etag, String versionId) { + GetObjectResponse.Builder builder = GetObjectResponse.builder(); if (etag != null) { - md.setHeader(Headers.ETAG, etag); + builder.eTag(etag); } if (versionId != null) { - md.setHeader(Headers.S3_VERSION_ID, versionId); + builder.versionId(versionId); } - S3Object response = emptyResponse(); - response.setObjectMetadata(md); - return response; - } - - private S3Object emptyResponse() { - S3Object response = new S3Object(); - response.setBucketName(BUCKET); - response.setKey(OBJECT); - return response; + return builder.build(); } private S3ObjectAttributes objectAttributes( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java index a2b013f468a..9e903fd85ff 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java @@ -20,10 +20,9 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; import org.junit.Before; import org.junit.Test; +import software.amazon.awssdk.http.apache.ApacheHttpClient; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; @@ -74,7 +73,7 @@ public class TestWildflyAndOpenSSLBinding extends AbstractHadoopTestBase { Configuration conf = new Configuration(false); conf.set(SSL_CHANNEL_MODE, "no-such-mode "); intercept(IllegalArgumentException.class, () -> - bindSSLChannelMode(conf, new ClientConfiguration())); + bindSSLChannelMode(conf, ApacheHttpClient.builder())); } @Test @@ -143,9 +142,7 @@ public class TestWildflyAndOpenSSLBinding extends AbstractHadoopTestBase { DelegatingSSLSocketFactory.resetDefaultFactory(); Configuration conf = new Configuration(false); conf.set(SSL_CHANNEL_MODE, channelMode.name()); - ClientConfiguration awsConf = new ClientConfiguration(); - awsConf.setProtocol(Protocol.HTTPS); - bindSSLChannelMode(conf, awsConf); + bindSSLChannelMode(conf, ApacheHttpClient.builder()); return DelegatingSSLSocketFactory.getDefaultFactory().getChannelMode(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java index 298c1444bb9..5c33f19270e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java @@ -21,13 +21,24 @@ package org.apache.hadoop.fs.s3a.audit; import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Map; import java.util.function.Consumer; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; + +import software.amazon.awssdk.awscore.AwsExecutionAttribute; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.http.SdkHttpMethod; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import org.junit.After; import org.junit.Before; import org.slf4j.Logger; @@ -137,22 +148,58 @@ public abstract class AbstractAuditingTest extends AbstractHadoopTestBase { /** * Create a head request and pass it through the manager's beforeExecution() * callback. + * * @return a processed request. */ - protected GetObjectMetadataRequest head() { - return manager.beforeExecution( - requestFactory.newGetObjectMetadataRequest("/")); + protected SdkHttpRequest head() { + HeadObjectRequest.Builder headObjectRequestBuilder = + requestFactory.newHeadObjectRequestBuilder("/"); + manager.requestCreated(headObjectRequestBuilder); + HeadObjectRequest headObjectRequest = headObjectRequestBuilder.build(); + ExecutionAttributes executionAttributes = ExecutionAttributes.builder().build(); + InterceptorContext context = InterceptorContext.builder() + .request(headObjectRequest) + .httpRequest(SdkHttpRequest.builder() + .uri(URI.create("https://test")) + .method(SdkHttpMethod.HEAD) + .build()) + .build(); + manager.beforeExecution(context, executionAttributes); + return manager.modifyHttpRequest(context, executionAttributes); } /** - * Create a GetObject request and modify it before passing it through auditor. - * @param modifyRequest Consumer Interface for changing the request before passing to the auditor - * @return the request + * Create a get request and pass it through the manager's beforeExecution() + * callback. + * + * @return a processed request. */ - protected GetObjectRequest get(Consumer modifyRequest) { - GetObjectRequest req = requestFactory.newGetObjectRequest("/"); - modifyRequest.accept(req); - return manager.beforeExecution(req); + protected SdkHttpRequest get(String range) { + GetObjectRequest.Builder getObjectRequestBuilder = + requestFactory.newGetObjectRequestBuilder("/"); + + SdkHttpRequest.Builder httpRequestBuilder = + SdkHttpRequest.builder().uri(URI.create("https://test")).method(SdkHttpMethod.GET); + + if (!range.isEmpty()) { + getObjectRequestBuilder.range(range); + List rangeHeader = new ArrayList<>(); + rangeHeader.add(range); + Map> headers = new HashMap<>(); + headers.put("Range", rangeHeader); + httpRequestBuilder.headers(headers); + } + + manager.requestCreated(getObjectRequestBuilder); + GetObjectRequest getObjectRequest = getObjectRequestBuilder.build(); + ExecutionAttributes executionAttributes = ExecutionAttributes.builder().build().putAttribute( + AwsExecutionAttribute.OPERATION_NAME, "GetObject"); + InterceptorContext context = InterceptorContext.builder() + .request(getObjectRequest) + .httpRequest(httpRequestBuilder.build()) + .build(); + manager.beforeExecution(context, executionAttributes); + return manager.modifyHttpRequest(context, executionAttributes); } /** @@ -244,15 +291,31 @@ public abstract class AbstractAuditingTest extends AbstractHadoopTestBase { * @param keys keys to be provided in the bulk delete request. * @return a processed request. */ - protected DeleteObjectsRequest headForBulkDelete(String... keys) { + protected SdkHttpRequest headForBulkDelete(String... keys) { if (keys == null || keys.length == 0) { return null; } - List keysToDelete = Arrays + + List keysToDelete = Arrays .stream(keys) - .map(DeleteObjectsRequest.KeyVersion::new) + .map(key -> ObjectIdentifier.builder().key(key).build()) .collect(Collectors.toList()); - return manager.beforeExecution(requestFactory.newBulkDeleteRequest(keysToDelete)); + + ExecutionAttributes executionAttributes = ExecutionAttributes.builder().build(); + + SdkHttpRequest.Builder httpRequestBuilder = + SdkHttpRequest.builder().uri(URI.create("https://test")).method(SdkHttpMethod.POST); + + DeleteObjectsRequest deleteObjectsRequest = + requestFactory.newBulkDeleteRequestBuilder(keysToDelete).build(); + + InterceptorContext context = InterceptorContext.builder() + .request(deleteObjectsRequest) + .httpRequest(httpRequestBuilder.build()) + .build(); + + manager.beforeExecution(context, executionAttributes); + return manager.modifyHttpRequest(context, executionAttributes); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java index ad72d75081b..1520e588e54 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java @@ -30,7 +30,7 @@ import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_FAILURE; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_SPAN_CREATION; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED; -import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_SERVICE_CLASSNAME; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.LOGGING_AUDIT_SERVICE; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.NOOP_AUDIT_SERVICE; @@ -119,7 +119,7 @@ public final class AuditTestSupport { S3ATestUtils.removeBaseAndBucketOverrides(conf, REFERRER_HEADER_ENABLED, REJECT_OUT_OF_SPAN_OPERATIONS, - AUDIT_REQUEST_HANDLERS, + AUDIT_EXECUTION_INTERCEPTORS, AUDIT_SERVICE_CLASSNAME, AUDIT_ENABLED); return conf; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java index 9e6d82ce6ac..bd60165ebe4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java @@ -33,7 +33,7 @@ import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_FAILURE; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.enableLoggingAuditor; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.resetAuditOptions; -import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.UNAUDITED_OPERATION; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupCounterStatistic; @@ -57,8 +57,8 @@ public class ITestAuditManager extends AbstractS3ACostTest { Configuration conf = super.createConfiguration(); resetAuditOptions(conf); enableLoggingAuditor(conf); - conf.set(AUDIT_REQUEST_HANDLERS, - SimpleAWSRequestHandler.CLASS); + conf.set(AUDIT_EXECUTION_INTERCEPTORS, + SimpleAWSExecutionInterceptor.CLASS); return conf; } @@ -117,14 +117,14 @@ public class ITestAuditManager extends AbstractS3ACostTest { public void testRequestHandlerBinding() throws Throwable { describe("Verify that extra request handlers can be added and that they" + " will be invoked during request execution"); - final long baseCount = SimpleAWSRequestHandler.getInvocationCount(); + final long baseCount = SimpleAWSExecutionInterceptor.getInvocationCount(); final S3AFileSystem fs = getFileSystem(); final long exec0 = lookupCounterStatistic(iostats(), AUDIT_REQUEST_EXECUTION.getSymbol()); // API call to a known path, `getBucketLocation()` does not always result in an API call. fs.listStatus(path("/")); // which MUST have ended up calling the extension request handler - Assertions.assertThat(SimpleAWSRequestHandler.getInvocationCount()) + Assertions.assertThat(SimpleAWSExecutionInterceptor.getInvocationCount()) .describedAs("Invocation count of plugged in request handler") .isGreaterThan(baseCount); assertThatStatisticCounter(iostats(), AUDIT_REQUEST_EXECUTION.getSymbol()) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSRequestHandler.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java similarity index 68% rename from hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSRequestHandler.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java index 6f5a0445a92..8014b051873 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSRequestHandler.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java @@ -20,28 +20,28 @@ package org.apache.hadoop.fs.s3a.audit; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.handlers.RequestHandler2; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; /** - * Simple AWS handler to verify dynamic loading of extra request - * handlers during auditing setup. + * Simple AWS interceptor to verify dynamic loading of extra + * execution interceptors during auditing setup. * The invocation counter tracks the count of calls to - * {@link #beforeExecution(AmazonWebServiceRequest)}. + * {@link #beforeExecution}. */ -public final class SimpleAWSRequestHandler extends RequestHandler2 { +public final class SimpleAWSExecutionInterceptor implements ExecutionInterceptor { public static final String CLASS - = "org.apache.hadoop.fs.s3a.audit.SimpleAWSRequestHandler"; + = "org.apache.hadoop.fs.s3a.audit.SimpleAWSExecutionInterceptor"; /** Count of invocations. */ private static final AtomicLong INVOCATIONS = new AtomicLong(0); @Override - public AmazonWebServiceRequest beforeExecution( - final AmazonWebServiceRequest request) { + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { INVOCATIONS.incrementAndGet(); - return request; } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java index 7cdab4c4b75..4f476604332 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java @@ -22,9 +22,12 @@ import java.io.IOException; import java.nio.file.AccessDeniedException; import java.util.List; -import com.amazonaws.DefaultRequest; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.http.SdkHttpMethod; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -39,13 +42,15 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.service.Service; import org.apache.hadoop.test.AbstractHadoopTestBase; + import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.attachSpanToRequest; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.retrieveAttachedSpan; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.createIOStatisticsStoreForAuditing; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditConfig; -import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_SERVICE_CLASSNAME; +import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_EXECUTION_ATTRIBUTE; import static org.apache.hadoop.service.ServiceAssert.assertServiceStateStarted; import static org.apache.hadoop.service.ServiceAssert.assertServiceStateStopped; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -159,30 +164,50 @@ public class TestAuditIntegration extends AbstractHadoopTestBase { } @Test - public void testSingleRequestHandler() throws Throwable { + public void testSingleExecutionInterceptor() throws Throwable { AuditManagerS3A manager = AuditIntegration.createAndStartAuditManager( noopAuditConfig(), ioStatistics); - List handlers - = manager.createRequestHandlers(); - assertThat(handlers) + List interceptors + = manager.createExecutionInterceptors(); + assertThat(interceptors) .hasSize(1); - RequestHandler2 handler = handlers.get(0); + ExecutionInterceptor interceptor = interceptors.get(0); + RequestFactory requestFactory = RequestFactoryImpl.builder() .withBucket("bucket") .build(); + HeadObjectRequest.Builder requestBuilder = + requestFactory.newHeadObjectRequestBuilder("/"); + + assertThat(interceptor instanceof AWSAuditEventCallbacks).isTrue(); + ((AWSAuditEventCallbacks)interceptor).requestCreated(requestBuilder); + + HeadObjectRequest request = requestBuilder.build(); + SdkHttpRequest httpRequest = SdkHttpRequest.builder() + .protocol("https") + .host("test") + .method(SdkHttpMethod.HEAD) + .build(); + + ExecutionAttributes attributes = ExecutionAttributes.builder().build(); + InterceptorContext context = InterceptorContext.builder() + .request(request) + .httpRequest(httpRequest) + .build(); + // test the basic pre-request sequence while avoiding // the complexity of recreating the full sequence // (and probably getting it wrong) - GetObjectMetadataRequest r - = requestFactory.newGetObjectMetadataRequest("/"); - DefaultRequest dr = new DefaultRequest(r, "S3"); - assertThat(handler.beforeMarshalling(r)) - .isNotNull(); - assertThat(handler.beforeExecution(r)) - .isNotNull(); - handler.beforeRequest(dr); - + interceptor.beforeExecution(context, attributes); + interceptor.modifyRequest(context, attributes); + interceptor.beforeMarshalling(context, attributes); + interceptor.afterMarshalling(context, attributes); + interceptor.modifyHttpRequest(context, attributes); + interceptor.beforeTransmission(context, attributes); + AuditSpanS3A span = attributes.getAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE); + assertThat(span).isNotNull(); + assertThat(span.isValidSpan()).isFalse(); } /** @@ -192,14 +217,14 @@ public class TestAuditIntegration extends AbstractHadoopTestBase { public void testRequestHandlerLoading() throws Throwable { Configuration conf = noopAuditConfig(); conf.setClassLoader(this.getClass().getClassLoader()); - conf.set(AUDIT_REQUEST_HANDLERS, - SimpleAWSRequestHandler.CLASS); + conf.set(AUDIT_EXECUTION_INTERCEPTORS, + SimpleAWSExecutionInterceptor.CLASS); AuditManagerS3A manager = AuditIntegration.createAndStartAuditManager( conf, ioStatistics); - assertThat(manager.createRequestHandlers()) + assertThat(manager.createExecutionInterceptors()) .hasSize(2) - .hasAtLeastOneElementOfType(SimpleAWSRequestHandler.class); + .hasAtLeastOneElementOfType(SimpleAWSExecutionInterceptor.class); } @Test @@ -216,8 +241,8 @@ public class TestAuditIntegration extends AbstractHadoopTestBase { @Test public void testNoopAuditManager() throws Throwable { AuditManagerS3A manager = AuditIntegration.stubAuditManager(); - assertThat(manager.createStateChangeListener()) - .describedAs("transfer state change listener") + assertThat(manager.createTransferListener()) + .describedAs("transfer listener") .isNotNull(); } @@ -226,11 +251,10 @@ public class TestAuditIntegration extends AbstractHadoopTestBase { AuditManagerS3A manager = AuditIntegration.stubAuditManager(); AuditSpanS3A span = manager.createSpan("op", null, null); - GetObjectMetadataRequest request = - new GetObjectMetadataRequest("bucket", "key"); - attachSpanToRequest(request, span); - AWSAuditEventCallbacks callbacks = retrieveAttachedSpan(request); - assertThat(callbacks).isSameAs(span); + ExecutionAttributes attributes = ExecutionAttributes.builder().build(); + attachSpanToRequest(attributes, span); + AuditSpanS3A retrievedSpan = retrieveAttachedSpan(attributes); + assertThat(retrievedSpan).isSameAs(span); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java index 608667d9dfe..e5e4afc434c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java @@ -20,13 +20,14 @@ package org.apache.hadoop.fs.s3a.audit; import java.util.List; -import com.amazonaws.handlers.RequestHandler2; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import org.junit.Before; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.store.audit.AuditSpan; + import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditConfig; import static org.assertj.core.api.Assertions.assertThat; @@ -56,10 +57,10 @@ public class TestAuditSpanLifecycle extends AbstractAuditingTest { } @Test - public void testCreateRequestHandlers() throws Throwable { - List handlers - = getManager().createRequestHandlers(); - assertThat(handlers).isNotEmpty(); + public void testCreateExecutionInterceptors() throws Throwable { + List interceptors + = getManager().createExecutionInterceptors(); + assertThat(interceptors).isNotEmpty(); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java index b772e6dfc06..7f8dd043261 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java @@ -20,12 +20,11 @@ package org.apache.hadoop.fs.s3a.audit; import java.io.IOException; import java.net.URISyntaxException; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.http.SdkHttpRequest; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -38,6 +37,7 @@ import org.apache.hadoop.fs.audit.CommonAuditContext; import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader; import org.apache.hadoop.security.UserGroupInformation; + import static org.apache.hadoop.fs.audit.AuditConstants.DELETE_KEYS_SIZE; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.REFERRER_HEADER_FILTER; @@ -97,13 +97,16 @@ public class TestHttpReferrerAuditHeader extends AbstractAuditingTest { public void testHttpReferrerPatchesTheRequest() throws Throwable { AuditSpan span = span(); long ts = span.getTimestamp(); - GetObjectMetadataRequest request = head(); - Map headers - = request.getCustomRequestHeaders(); + SdkHttpRequest request = head(); + Map> headers = request.headers(); assertThat(headers) .describedAs("Custom headers") .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); @@ -305,13 +308,16 @@ public class TestHttpReferrerAuditHeader extends AbstractAuditingTest { @Test public void testGetObjectRange() throws Throwable { AuditSpan span = span(); - GetObjectRequest request = get(getObjectRequest -> getObjectRequest.setRange(100, 200)); - Map headers - = request.getCustomRequestHeaders(); + SdkHttpRequest request = get("bytes=100-200"); + Map> headers = request.headers(); assertThat(headers) - .describedAs("Custom headers") - .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + .describedAs("Custom headers") + .containsKey(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); @@ -324,13 +330,16 @@ public class TestHttpReferrerAuditHeader extends AbstractAuditingTest { @Test public void testGetObjectWithoutRange() throws Throwable { AuditSpan span = span(); - GetObjectRequest request = get(getObjectRequest -> {}); - Map headers - = request.getCustomRequestHeaders(); + SdkHttpRequest request = get(""); + Map> headers = request.headers(); assertThat(headers) .describedAs("Custom headers") .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); @@ -341,16 +350,20 @@ public class TestHttpReferrerAuditHeader extends AbstractAuditingTest { public void testHttpReferrerForBulkDelete() throws Throwable { AuditSpan span = span(); long ts = span.getTimestamp(); - DeleteObjectsRequest request = headForBulkDelete( + SdkHttpRequest request = headForBulkDelete( "key_01", "key_02", "key_03"); - Map headers - = request.getCustomRequestHeaders(); + Map> headers + = request.headers(); assertThat(headers) .describedAs("Custom headers") .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java index 8d37b432acb..0059e5b6c53 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java @@ -18,9 +18,12 @@ package org.apache.hadoop.fs.s3a.audit; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyPartRequest; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -30,6 +33,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor; import org.apache.hadoop.fs.store.audit.AuditSpan; + import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; import static org.assertj.core.api.Assertions.assertThat; @@ -131,8 +135,23 @@ public class TestLoggingAuditor extends AbstractAuditingTest { */ @Test public void testCopyOutsideSpanAllowed() throws Throwable { - getManager().beforeExecution(new CopyPartRequest()); - getManager().beforeExecution(new CompleteMultipartUploadRequest()); + getManager().beforeExecution( + InterceptorContext.builder() + .request(UploadPartCopyRequest.builder().build()) + .build(), + ExecutionAttributes.builder().build()); + getManager().beforeExecution( + InterceptorContext.builder() + .request(GetBucketLocationRequest.builder().build()) + .build(), + ExecutionAttributes.builder().build()); + getManager().beforeExecution( + InterceptorContext.builder() + .request(CompleteMultipartUploadRequest.builder() + .multipartUpload(u -> {}) + .build()) + .build(), + ExecutionAttributes.builder().build()); } /** @@ -141,9 +160,9 @@ public class TestLoggingAuditor extends AbstractAuditingTest { */ @Test public void testTransferStateListenerOutsideSpan() throws Throwable { - TransferStateChangeListener listener - = getManager().createStateChangeListener(); - listener.transferStateChanged(null, null); + TransferListener listener + = getManager().createTransferListener(); + listener.transferInitiated(null); assertHeadUnaudited(); } @@ -158,15 +177,15 @@ public class TestLoggingAuditor extends AbstractAuditingTest { AuditSpan span = span(); // create the listener in the span - TransferStateChangeListener listener - = getManager().createStateChangeListener(); + TransferListener listener + = getManager().createTransferListener(); span.deactivate(); // head calls fail assertHeadUnaudited(); // until the state change switches this thread back to the span - listener.transferStateChanged(null, null); + listener.transferInitiated(null); // which can be probed assertActiveSpan(span); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 9fb09b4cede..1c6e00655ac 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -26,14 +26,15 @@ import java.nio.file.AccessDeniedException; import java.util.List; import java.util.stream.IntStream; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.services.sts.model.StsException; import com.fasterxml.jackson.core.JsonProcessingException; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -57,7 +58,7 @@ import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.E_FORBIDDEN_AWS_PROVIDER; import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.*; import static org.apache.hadoop.fs.s3a.auth.RoleModel.*; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*; @@ -140,7 +141,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testCreateCredentialProvider() throws IOException { describe("Create the credential provider"); @@ -148,13 +148,12 @@ public class ITestAssumeRole extends AbstractS3ATestBase { try (AssumedRoleCredentialProvider provider = new AssumedRoleCredentialProvider(uri, conf)) { LOG.info("Provider is {}", provider); - AWSCredentials credentials = provider.getCredentials(); + AwsCredentials credentials = provider.resolveCredentials(); assertNotNull("Null credentials from " + provider, credentials); } } @Test - @SuppressWarnings("deprecation") public void testCreateCredentialProviderNoURI() throws IOException { describe("Create the credential provider"); @@ -162,7 +161,7 @@ public class ITestAssumeRole extends AbstractS3ATestBase { try (AssumedRoleCredentialProvider provider = new AssumedRoleCredentialProvider(null, conf)) { LOG.info("Provider is {}", provider); - AWSCredentials credentials = provider.getCredentials(); + AwsCredentials credentials = provider.resolveCredentials(); assertNotNull("Null credentials from " + provider, credentials); } } @@ -172,7 +171,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { * @return a configuration set to use to the role ARN. * @throws JsonProcessingException problems working with JSON policies. */ - @SuppressWarnings("deprecation") protected Configuration createValidRoleConf() throws JsonProcessingException { String roleARN = getAssumedRoleARN(); @@ -186,11 +184,10 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testAssumedInvalidRole() throws Throwable { Configuration conf = new Configuration(); conf.set(ASSUMED_ROLE_ARN, ROLE_ARN_EXAMPLE); - interceptClosing(AWSSecurityTokenServiceException.class, + interceptClosing(StsException.class, "", () -> new AssumedRoleCredentialProvider(uri, conf)); } @@ -204,7 +201,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleNoARN() throws Exception { describe("Attemnpt to create the FS with no ARN"); Configuration conf = createAssumedRoleConfig(); @@ -237,7 +233,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleCannotAuthAssumedRole() throws Exception { describe("Assert that you can't use assumed roles to auth assumed roles"); @@ -251,7 +246,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleBadInnerAuth() throws Exception { describe("Try to authenticate with a keypair with spaces"); @@ -267,7 +261,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleBadInnerAuth2() throws Exception { describe("Try to authenticate with an invalid keypair"); @@ -351,7 +344,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleUndefined() throws Throwable { describe("Verify that you cannot instantiate the" + " AssumedRoleCredentialProvider without a role ARN"); @@ -363,12 +355,11 @@ public class ITestAssumeRole extends AbstractS3ATestBase { } @Test - @SuppressWarnings("deprecation") public void testAssumedIllegalDuration() throws Throwable { describe("Expect the constructor to fail if the session is to short"); Configuration conf = new Configuration(); conf.set(ASSUMED_ROLE_SESSION_DURATION, "30s"); - interceptClosing(AWSSecurityTokenServiceException.class, "", + interceptClosing(StsException.class, "", () -> new AssumedRoleCredentialProvider(uri, conf)); } @@ -537,7 +528,6 @@ public class ITestAssumeRole extends AbstractS3ATestBase { * don't break. */ @Test - @SuppressWarnings("deprecation") public void testAssumedRoleRetryHandler() throws Throwable { try(AssumedRoleCredentialProvider provider = new AssumedRoleCredentialProvider(getFileSystem().getUri(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java index a829d470e7a..cdf89211fd7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java @@ -25,12 +25,12 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.SignableRequest; -import com.amazonaws.auth.AWS4Signer; -import com.amazonaws.arn.Arn; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.Signer; -import com.amazonaws.services.s3.internal.AWSS3V4Signer; +import software.amazon.awssdk.arns.Arn; +import software.amazon.awssdk.auth.signer.Aws4Signer; +import software.amazon.awssdk.auth.signer.AwsS3V4Signer; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.signer.Signer; +import software.amazon.awssdk.http.SdkHttpFullRequest; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; @@ -183,14 +183,15 @@ public class ITestCustomSigner extends AbstractS3ATestBase { * request because the signature calculated by the service doesn't match * what we sent. * @param request the request to sign. - * @param credentials credentials used to sign the request. + * @param executionAttributes request executionAttributes which contain the credentials. */ @Override - public void sign(SignableRequest request, AWSCredentials credentials) { + public SdkHttpFullRequest sign(SdkHttpFullRequest request, + ExecutionAttributes executionAttributes) { int c = INVOCATION_COUNT.incrementAndGet(); LOG.info("Signing request #{}", c); - String host = request.getEndpoint().getHost(); + String host = request.host(); String bucketName = parseBucketFromHost(host); try { lastStoreValue = CustomSignerInitializer @@ -199,19 +200,11 @@ public class ITestCustomSigner extends AbstractS3ATestBase { throw new RuntimeException("Failed to get current Ugi", e); } if (bucketName.equals("kms")) { - AWS4Signer realKMSSigner = new AWS4Signer(); - realKMSSigner.setServiceName("kms"); - if (lastStoreValue != null) { - realKMSSigner.setRegionName(lastStoreValue.conf.get(TEST_REGION_KEY)); - } - realKMSSigner.sign(request, credentials); + Aws4Signer realKMSSigner = Aws4Signer.create(); + return realKMSSigner.sign(request, executionAttributes); } else { - AWSS3V4Signer realSigner = new AWSS3V4Signer(); - realSigner.setServiceName("s3"); - if (lastStoreValue != null) { - realSigner.setRegionName(lastStoreValue.conf.get(TEST_REGION_KEY)); - } - realSigner.sign(request, credentials); + AwsS3V4Signer realSigner = AwsS3V4Signer.create(); + return realSigner.sign(request, executionAttributes); } } @@ -235,11 +228,11 @@ public class ITestCustomSigner extends AbstractS3ATestBase { String accessPointName = bucketName.substring(0, bucketName.length() - (accountId.length() + 1)); Arn arn = Arn.builder() - .withAccountId(accountId) - .withPartition("aws") - .withRegion(hostBits[2]) - .withResource("accesspoint" + "/" + accessPointName) - .withService("s3").build(); + .accountId(accountId) + .partition("aws") + .region(hostBits[2]) + .resource("accesspoint" + "/" + accessPointName) + .service("s3").build(); bucketName = arn.toString(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java index 37c2dce4e1d..186887d745b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java @@ -146,7 +146,6 @@ public final class RoleTestUtils { * @param roleARN ARN of role * @return the new configuration */ - @SuppressWarnings("deprecation") public static Configuration newAssumedRoleConfig( final Configuration srcConf, final String roleARN) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java index c5ed9dbaac4..b9d547635f7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java @@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.auth; import java.net.URI; import java.net.URISyntaxException; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.junit.Before; import org.junit.Test; @@ -94,13 +94,13 @@ public class TestMarshalledCredentials extends HadoopTestBase { new Configuration(false), credentials, MarshalledCredentials.CredentialTypeRequired.SessionOnly); - AWSCredentials aws = provider.getCredentials(); + AwsCredentials aws = provider.resolveCredentials(); assertEquals(credentials.toString(), credentials.getAccessKey(), - aws.getAWSAccessKeyId()); + aws.accessKeyId()); assertEquals(credentials.toString(), credentials.getSecretKey(), - aws.getAWSSecretKey()); + aws.secretAccessKey()); // because the credentials are set to full only, creation will fail } @@ -119,7 +119,7 @@ public class TestMarshalledCredentials extends HadoopTestBase { MarshalledCredentials.CredentialTypeRequired.FullOnly); // because the credentials are set to full only, creation will fail intercept(NoAuthWithAWSException.class, "test", - () -> provider.getCredentials()); + () -> provider.resolveCredentials()); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java index ca87b5c1b34..595e2687276 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java @@ -19,8 +19,6 @@ package org.apache.hadoop.fs.s3a.auth; import java.io.Closeable; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.security.PrivilegedExceptionAction; import java.util.HashMap; @@ -28,12 +26,10 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.TimeUnit; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.DefaultRequest; -import com.amazonaws.SignableRequest; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.Signer; -import com.amazonaws.auth.SignerFactory; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.signer.Signer; +import software.amazon.awssdk.http.SdkHttpFullRequest; +import software.amazon.awssdk.http.SdkHttpMethod; import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Rule; @@ -284,7 +280,7 @@ public class TestSignerManager { throws IOException, InterruptedException { ugi.doAs((PrivilegedExceptionAction) () -> { Signer signer = new SignerForInitializerTest(); - SignableRequest signableRequest = constructSignableRequest(bucket); + SdkHttpFullRequest signableRequest = constructSignableRequest(bucket); signer.sign(signableRequest, null); verifyStoreValueInSigner(expectNullStoreInfo, bucket, identifier); return null; @@ -336,8 +332,10 @@ public class TestSignerManager { private static boolean initialized = false; @Override - public void sign(SignableRequest request, AWSCredentials credentials) { + public SdkHttpFullRequest sign(SdkHttpFullRequest sdkHttpFullRequest, + ExecutionAttributes executionAttributes) { initialized = true; + return sdkHttpFullRequest; } public static void reset() { @@ -354,8 +352,10 @@ public class TestSignerManager { private static boolean initialized = false; @Override - public void sign(SignableRequest request, AWSCredentials credentials) { + public SdkHttpFullRequest sign(SdkHttpFullRequest sdkHttpFullRequest, + ExecutionAttributes executionAttributes) { initialized = true; + return sdkHttpFullRequest; } public static void reset() { @@ -472,11 +472,15 @@ public class TestSignerManager { private static StoreValue retrievedStoreValue; @Override - public void sign(SignableRequest request, AWSCredentials credentials) { - String bucketName = request.getEndpoint().getHost(); + public SdkHttpFullRequest sign(SdkHttpFullRequest sdkHttpFullRequest, + ExecutionAttributes executionAttributes) { + String bucket = sdkHttpFullRequest.host().split("//")[1]; + // remove trailing slash + String bucketName = bucket.substring(0, bucket.length() - 1); try { retrievedStoreValue = SignerInitializerForTest .getStoreInfo(bucketName, UserGroupInformation.getCurrentUser()); + return sdkHttpFullRequest; } catch (IOException e) { throw new RuntimeException("Failed to get current ugi", e); } @@ -579,12 +583,9 @@ public class TestSignerManager { return identifier + "_" + bucketName + "_" + user; } - private SignableRequest constructSignableRequest(String bucketName) - throws URISyntaxException { - DefaultRequest signableRequest = new DefaultRequest( - AmazonWebServiceRequest.NOOP, "fakeservice"); - URI uri = new URI("s3://" + bucketName + "/"); - signableRequest.setEndpoint(uri); - return signableRequest; + private SdkHttpFullRequest constructSignableRequest(String bucketName) { + String host = "s3://" + bucketName + "/"; + return SdkHttpFullRequest.builder().host(host).protocol("https").method(SdkHttpMethod.GET) + .build(); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java index 3a7d78d68f7..4ee79e7220a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java @@ -20,8 +20,8 @@ package org.apache.hadoop.fs.s3a.auth.delegation; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.apache.hadoop.fs.s3a.CredentialInitializationException; @@ -29,23 +29,18 @@ import org.apache.hadoop.fs.s3a.CredentialInitializationException; * Simple AWS credential provider which counts how often it is invoked. */ public class CountInvocationsProvider - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { public static final String NAME = CountInvocationsProvider.class.getName(); public static final AtomicLong COUNTER = new AtomicLong(0); @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { COUNTER.incrementAndGet(); throw new CredentialInitializationException("no credentials"); } - @Override - public void refresh() { - - } - public static long getInvocationCount() { return COUNTER.get(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java index 295125169a0..28784b17c9c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java @@ -26,14 +26,16 @@ import java.io.UncheckedIOException; import java.net.URI; import java.nio.file.AccessDeniedException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -254,7 +256,6 @@ public class ITestSessionDelegationInFilesystem extends AbstractDelegationIT { } @Test - @SuppressWarnings("deprecation") public void testAddTokensFromFileSystem() throws Throwable { describe("verify FileSystem.addDelegationTokens() collects tokens"); S3AFileSystem fs = getFileSystem(); @@ -276,7 +277,7 @@ public class ITestSessionDelegationInFilesystem extends AbstractDelegationIT { AWSCredentialProviderList providerList = requireNonNull( delegationTokens.getCredentialProviders(), "providers"); - providerList.getCredentials(); + providerList.resolveCredentials(); } @Test @@ -323,14 +324,15 @@ public class ITestSessionDelegationInFilesystem extends AbstractDelegationIT { * Create a FS with a delegated token, verify it works as a filesystem, * and that you can pick up the same DT from that FS too. */ - @SuppressWarnings("deprecation") @Test public void testDelegatedFileSystem() throws Throwable { describe("Delegation tokens can be passed to a new filesystem;" + " if role restricted, permissions are tightened."); S3AFileSystem fs = getFileSystem(); // force a probe of the remote FS to make sure its endpoint is valid - fs.getObjectMetadata(new Path("/")); + // TODO: Check what should happen here. Calling headObject() on the root path fails in V2, + // with the error that key cannot be empty. + // fs.getObjectMetadata(new Path("/")); readLandsatMetadata(fs); URI uri = fs.getUri(); @@ -577,8 +579,7 @@ public class ITestSessionDelegationInFilesystem extends AbstractDelegationIT { * @return result of the HEAD * @throws Exception failure */ - @SuppressWarnings("deprecation") - protected ObjectMetadata readLandsatMetadata(final S3AFileSystem delegatedFS) + protected HeadBucketResponse readLandsatMetadata(final S3AFileSystem delegatedFS) throws Exception { AWSCredentialProviderList testingCreds = delegatedFS.shareCredentials("testing"); @@ -586,20 +587,21 @@ public class ITestSessionDelegationInFilesystem extends AbstractDelegationIT { URI landsat = new URI(DEFAULT_CSVTEST_FILE); DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); - factory.setConf(new Configuration(delegatedFS.getConf())); + Configuration conf = delegatedFS.getConf(); + factory.setConf(conf); String host = landsat.getHost(); S3ClientFactory.S3ClientCreationParameters parameters = null; parameters = new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(testingCreds) .withPathUri(new URI("s3a://localhost/")) - .withEndpoint(DEFAULT_ENDPOINT) .withMetrics(new EmptyS3AStatisticsContext() .newStatisticsFromAwsSdk()) - .withUserAgentSuffix("ITestSessionDelegationInFilesystem"); - AmazonS3 s3 = factory.createS3Client(landsat, parameters); + .withUserAgentSuffix("ITestSessionDelegationInFilesystem") + .withRegion(Region.US_WEST_2); + S3Client s3 = factory.createS3Client(landsat, parameters); return Invoker.once("HEAD", host, - () -> s3.getObjectMetadata(host, landsat.getPath().substring(1))); + () -> s3.headBucket(b -> b.bucket(host))); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java index fab7ffdbb76..7f13cb3a4d1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java @@ -22,8 +22,8 @@ import java.io.File; import java.io.IOException; import java.net.URI; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSSessionCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import org.hamcrest.Matchers; import org.junit.Test; import org.slf4j.Logger; @@ -188,9 +188,9 @@ public class ITestSessionDelegationTokens extends AbstractDelegationIT { dt2.start(); dt2.resetTokenBindingToDT(originalDT); - final AWSSessionCredentials awsSessionCreds + final AwsSessionCredentials awsSessionCreds = verifySessionCredentials( - dt2.getCredentialProviders().getCredentials()); + dt2.getCredentialProviders().resolveCredentials()); final MarshalledCredentials origCreds = fromAWSCredentials( awsSessionCreds); @@ -249,7 +249,7 @@ public class ITestSessionDelegationTokens extends AbstractDelegationIT { * @return the retrieved DT. This is only for error reporting. * @throws IOException failure. */ - @SuppressWarnings({"OptionalGetWithoutIsPresent", "deprecation"}) + @SuppressWarnings({"OptionalGetWithoutIsPresent"}) protected AbstractS3ATokenIdentifier verifyCredentialPropagation( final S3AFileSystem fs, final MarshalledCredentials session, @@ -278,7 +278,7 @@ public class ITestSessionDelegationTokens extends AbstractDelegationIT { LOG.info("Regenerated DT is {}", newDT); final MarshalledCredentials creds2 = fromAWSCredentials( verifySessionCredentials( - delegationTokens2.getCredentialProviders().getCredentials())); + delegationTokens2.getCredentialProviders().resolveCredentials())); assertEquals("Credentials", session, creds2); assertTrue("Origin in " + boundId, boundId.getOrigin() @@ -287,12 +287,12 @@ public class ITestSessionDelegationTokens extends AbstractDelegationIT { } } - private AWSSessionCredentials verifySessionCredentials( - final AWSCredentials creds) { - AWSSessionCredentials session = (AWSSessionCredentials) creds; - assertNotNull("access key", session.getAWSAccessKeyId()); - assertNotNull("secret key", session.getAWSSecretKey()); - assertNotNull("session token", session.getSessionToken()); + private AwsSessionCredentials verifySessionCredentials( + final AwsCredentials creds) { + AwsSessionCredentials session = (AwsSessionCredentials) creds; + assertNotNull("access key", session.accessKeyId()); + assertNotNull("secret key", session.secretAccessKey()); + assertNotNull("session token", session.sessionToken()); return session; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java index 88d9ebfcdfd..992643ff8ce 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java @@ -37,7 +37,6 @@ import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.FULL_ import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.SESSION_TOKEN_KIND; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java index b193cca03db..94251431ad0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java @@ -718,7 +718,7 @@ public abstract class AbstractITCommitProtocol extends AbstractCommitITest { private void validateStorageClass(Path dir, String expectedStorageClass) throws Exception { Path expectedFile = getPart0000(dir); S3AFileSystem fs = getFileSystem(); - String actualStorageClass = fs.getObjectMetadata(expectedFile).getStorageClass(); + String actualStorageClass = fs.getObjectMetadata(expectedFile).storageClassAsString(); Assertions.assertThat(actualStorageClass) .describedAs("Storage class of object %s", expectedFile) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java index 62b76b7c395..a6d9b7fcaf9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java @@ -29,21 +29,20 @@ import java.util.Map; import java.util.UUID; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.MultipartUploadListing; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.junit.AfterClass; @@ -81,6 +80,7 @@ import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.test.HadoopTestBase; + import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; @@ -339,7 +339,7 @@ public class StagingTestBase { // created in Before private StagingTestBase.ClientResults results = null; private StagingTestBase.ClientErrors errors = null; - private AmazonS3 mockClient = null; + private S3Client mockClient = null; @Before public void setupJob() throws Exception { @@ -448,7 +448,7 @@ public class StagingTestBase { public static class ClientResults implements Serializable { private static final long serialVersionUID = -3137637327090709905L; // For inspection of what the committer did - private final Map requests = + private final Map requests = Maps.newHashMap(); private final List uploads = Lists.newArrayList(); private final List parts = Lists.newArrayList(); @@ -461,7 +461,7 @@ public class StagingTestBase { Maps.newHashMap(); private final List deletes = Lists.newArrayList(); - public Map getRequests() { + public Map getRequests() { return requests; } @@ -490,7 +490,7 @@ public class StagingTestBase { } public List getDeletePaths() { - return deletes.stream().map(DeleteObjectRequest::getKey).collect( + return deletes.stream().map(DeleteObjectRequest::key).collect( Collectors.toList()); } @@ -619,197 +619,163 @@ public class StagingTestBase { * @param errors when (if any) to fail * @return the mock client to patch in to a committer/FS instance */ - public static AmazonS3 newMockS3Client(final ClientResults results, + public static S3Client newMockS3Client(final ClientResults results, final ClientErrors errors) { - AmazonS3Client mockClient = mock(AmazonS3Client.class); + S3Client mockClientV2 = mock(S3Client.class); final Object lock = new Object(); // initiateMultipartUpload - when(mockClient - .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class))) + when(mockClientV2 + .createMultipartUpload(any(CreateMultipartUploadRequest.class))) .thenAnswer(invocation -> { - LOG.debug("initiateMultipartUpload for {}", mockClient); + LOG.debug("initiateMultipartUpload for {}", mockClientV2); synchronized (lock) { if (results.requests.size() == errors.failOnInit) { if (errors.recover) { errors.failOnInit(-1); } - throw new AmazonClientException( - "Mock Fail on init " + results.requests.size()); + throw AwsServiceException.builder() + .message("Mock Fail on init " + results.requests.size()) + .build(); } String uploadId = UUID.randomUUID().toString(); - InitiateMultipartUploadRequest req = getArgumentAt(invocation, - 0, InitiateMultipartUploadRequest.class); + CreateMultipartUploadRequest req = getArgumentAt(invocation, + 0, CreateMultipartUploadRequest.class); results.requests.put(uploadId, req); - results.activeUploads.put(uploadId, req.getKey()); + results.activeUploads.put(uploadId, req.key()); results.uploads.add(uploadId); - return newResult(results.requests.get(uploadId), uploadId); + return CreateMultipartUploadResponse.builder() + .uploadId(uploadId) + .build(); } }); // uploadPart - when(mockClient.uploadPart(any(UploadPartRequest.class))) + when(mockClientV2.uploadPart(any(UploadPartRequest.class), any(RequestBody.class))) .thenAnswer(invocation -> { - LOG.debug("uploadPart for {}", mockClient); + LOG.debug("uploadPart for {}", mockClientV2); synchronized (lock) { if (results.parts.size() == errors.failOnUpload) { if (errors.recover) { errors.failOnUpload(-1); } LOG.info("Triggering upload failure"); - throw new AmazonClientException( - "Mock Fail on upload " + results.parts.size()); + throw AwsServiceException.builder() + .message("Mock Fail on upload " + results.parts.size()) + .build(); } UploadPartRequest req = getArgumentAt(invocation, 0, UploadPartRequest.class); results.parts.add(req); String etag = UUID.randomUUID().toString(); - List etags = results.tagsByUpload.get(req.getUploadId()); + List etags = results.tagsByUpload.get(req.uploadId()); if (etags == null) { etags = Lists.newArrayList(); - results.tagsByUpload.put(req.getUploadId(), etags); + results.tagsByUpload.put(req.uploadId(), etags); } etags.add(etag); - return newResult(req, etag); + return UploadPartResponse.builder().eTag(etag).build(); } }); // completeMultipartUpload - when(mockClient + when(mockClientV2 .completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) .thenAnswer(invocation -> { - LOG.debug("completeMultipartUpload for {}", mockClient); + LOG.debug("completeMultipartUpload for {}", mockClientV2); synchronized (lock) { if (results.commits.size() == errors.failOnCommit) { if (errors.recover) { errors.failOnCommit(-1); } - throw new AmazonClientException( - "Mock Fail on commit " + results.commits.size()); + throw AwsServiceException.builder() + .message("Mock Fail on commit " + results.commits.size()) + .build(); } CompleteMultipartUploadRequest req = getArgumentAt(invocation, 0, CompleteMultipartUploadRequest.class); - String uploadId = req.getUploadId(); + String uploadId = req.uploadId(); removeUpload(results, uploadId); results.commits.add(req); - - return newResult(req); + return CompleteMultipartUploadResponse.builder().build(); } }); // abortMultipartUpload mocking doAnswer(invocation -> { - LOG.debug("abortMultipartUpload for {}", mockClient); + LOG.debug("abortMultipartUpload for {}", mockClientV2); synchronized (lock) { if (results.aborts.size() == errors.failOnAbort) { if (errors.recover) { errors.failOnAbort(-1); } - throw new AmazonClientException( - "Mock Fail on abort " + results.aborts.size()); + throw AwsServiceException.builder() + .message("Mock Fail on abort " + results.aborts.size()) + .build(); } AbortMultipartUploadRequest req = getArgumentAt(invocation, 0, AbortMultipartUploadRequest.class); - String id = req.getUploadId(); + String id = req.uploadId(); removeUpload(results, id); results.aborts.add(req); return null; } }) - .when(mockClient) + .when(mockClientV2) .abortMultipartUpload(any(AbortMultipartUploadRequest.class)); // deleteObject mocking doAnswer(invocation -> { - LOG.debug("deleteObject for {}", mockClient); + LOG.debug("deleteObject for {}", mockClientV2); synchronized (lock) { results.deletes.add(getArgumentAt(invocation, 0, DeleteObjectRequest.class)); return null; } }) - .when(mockClient) + .when(mockClientV2) .deleteObject(any(DeleteObjectRequest.class)); - // deleteObject mocking - doAnswer(invocation -> { - LOG.debug("deleteObject for {}", mockClient); - synchronized (lock) { - results.deletes.add(new DeleteObjectRequest( - getArgumentAt(invocation, 0, String.class), - getArgumentAt(invocation, 1, String.class) - )); - return null; - } - }).when(mockClient) - .deleteObject(any(String.class), any(String.class)); - // to String returns the debug information - when(mockClient.toString()).thenAnswer( + when(mockClientV2.toString()).thenAnswer( invocation -> "Mock3AClient " + results + " " + errors); - when(mockClient + when(mockClientV2 .listMultipartUploads(any(ListMultipartUploadsRequest.class))) .thenAnswer(invocation -> { synchronized (lock) { - MultipartUploadListing l = new MultipartUploadListing(); - l.setMultipartUploads( - results.activeUploads.entrySet().stream() - .map(e -> newMPU(e.getKey(), e.getValue())) - .collect(Collectors.toList())); - return l; + return ListMultipartUploadsResponse.builder() + .uploads(results.activeUploads.entrySet().stream() + .map(e -> MultipartUpload.builder() + .uploadId(e.getKey()) + .key(e.getValue()) + .build()) + .collect(Collectors.toList())) + .build(); } }); - return mockClient; + return mockClientV2; } /** * Remove an upload from the upload map. * @param results result set * @param uploadId The upload ID to remove - * @throws AmazonS3Exception with error code 404 if the id is unknown. + * @throws AwsServiceException with error code 404 if the id is unknown. */ protected static void removeUpload(final ClientResults results, final String uploadId) { String removed = results.activeUploads.remove(uploadId); if (removed == null) { // upload doesn't exist - AmazonS3Exception ex = new AmazonS3Exception( - "not found " + uploadId); - ex.setStatusCode(404); - throw ex; + throw AwsServiceException.builder() + .message("not found " + uploadId) + .statusCode(404) + .build(); } } - private static CompleteMultipartUploadResult newResult( - CompleteMultipartUploadRequest req) { - return new CompleteMultipartUploadResult(); - } - - - private static MultipartUpload newMPU(String id, String path) { - MultipartUpload up = new MultipartUpload(); - up.setUploadId(id); - up.setKey(path); - return up; - } - - private static UploadPartResult newResult(UploadPartRequest request, - String etag) { - UploadPartResult result = new UploadPartResult(); - result.setPartNumber(request.getPartNumber()); - result.setETag(etag); - return result; - } - - private static InitiateMultipartUploadResult newResult( - InitiateMultipartUploadRequest request, String uploadId) { - InitiateMultipartUploadResult result = new InitiateMultipartUploadResult(); - result.setUploadId(uploadId); - return result; - } - /** * create files in the attempt path that should be found by * {@code getTaskOutput}. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java index 4d24c07dacf..f96cf97ebd7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java @@ -27,7 +27,7 @@ import java.util.Map; import java.util.stream.Collectors; import java.util.stream.IntStream; -import com.amazonaws.services.s3.model.PartETag; +import software.amazon.awssdk.services.s3.model.CompletedPart; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.assertj.core.api.Assertions; @@ -39,6 +39,7 @@ import org.junit.runners.MethodSorters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -159,8 +160,8 @@ public class TestDirectoryCommitterScale // step1: a list of tags. // this is the md5sum of hadoop 3.2.1.tar String tag = "9062dcf18ffaee254821303bbd11c72b"; - List etags = IntStream.rangeClosed(1, BLOCKS_PER_TASK + 1) - .mapToObj(i -> new PartETag(i, tag)) + List etags = IntStream.rangeClosed(1, BLOCKS_PER_TASK + 1) + .mapToObj(i -> CompletedPart.builder().partNumber(i).eTag(tag).build()) .collect(Collectors.toList()); SinglePendingCommit base = new SinglePendingCommit(); base.setBucket(BUCKET); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java index b4cc21b61e2..5ac7e9ce756 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java @@ -31,9 +31,10 @@ import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; + import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.assertj.core.api.Assertions; import org.junit.After; @@ -50,7 +51,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3a.AWSClientIOException; import org.apache.hadoop.fs.s3a.MockS3AFileSystem; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.commit.AbstractS3ACommitter; @@ -69,6 +69,7 @@ import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.task.JobContextImpl; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; + import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; import static org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants.*; @@ -111,7 +112,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { // created in Before private StagingTestBase.ClientResults results = null; private StagingTestBase.ClientErrors errors = null; - private AmazonS3 mockClient = null; + private S3Client mockClient = null; private File tmpDir; /** @@ -472,7 +473,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept(AWSClientIOException.class, + intercept(IOException.class, "Fail on init 1", "Should fail during init", () -> committer.commitTask(tac)); @@ -500,7 +501,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept((Class) AWSClientIOException.class, + intercept(IOException.class, "Fail on upload 2", "Should fail during upload", () -> { @@ -512,7 +513,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { 1, results.getUploads().size()); assertEquals("Should abort the upload", results.getUploads().get(0), - results.getAborts().get(0).getUploadId()); + results.getAborts().get(0).uploadId()); assertPathDoesNotExist(fs, "Should remove the attempt path", attemptPath); } @@ -531,7 +532,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept((Class) AWSClientIOException.class, + intercept(IOException.class, "Fail on upload 5", "Should fail during upload", () -> { @@ -563,7 +564,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept((Class) AWSClientIOException.class, + intercept(IOException.class, "Fail on upload 5", "Should suppress abort failure, propagate upload failure", ()-> { @@ -636,7 +637,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { errors.failOnCommit(5); setMockLogLevel(MockS3AFileSystem.LOG_NAME); - intercept(AWSClientIOException.class, + intercept(IOException.class, "Fail on commit 5", "Should propagate the commit failure", () -> { @@ -644,17 +645,16 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { return jobCommitter.toString(); }); - Set commits = results.getCommits() .stream() .map(commit -> - "s3a://" + commit.getBucketName() + "/" + commit.getKey()) + "s3a://" + commit.bucket() + "/" + commit.key()) .collect(Collectors.toSet()); Set deletes = results.getDeletes() .stream() .map(delete -> - "s3a://" + delete.getBucketName() + "/" + delete.getKey()) + "s3a://" + delete.bucket() + "/" + delete.key()) .collect(Collectors.toSet()); Assertions.assertThat(commits) @@ -728,14 +728,14 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { private static Set getAbortedIds( List aborts) { return aborts.stream() - .map(AbortMultipartUploadRequest::getUploadId) + .map(AbortMultipartUploadRequest::uploadId) .collect(Collectors.toSet()); } private static Set getCommittedIds( List commits) { return commits.stream() - .map(CompleteMultipartUploadRequest::getUploadId) + .map(CompleteMultipartUploadRequest::uploadId) .collect(Collectors.toSet()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java index 01be1e06306..87ffbceb768 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java @@ -24,7 +24,7 @@ import java.util.List; import java.util.Set; import java.util.UUID; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.assertj.core.api.Assertions; @@ -146,10 +146,10 @@ public class TestStagingPartitionedTaskCommit protected void verifyFilesCreated( final PartitionedStagingCommitter committer) { Set files = Sets.newHashSet(); - for (InitiateMultipartUploadRequest request : + for (CreateMultipartUploadRequest request : getMockResults().getRequests().values()) { - assertEquals(BUCKET, request.getBucketName()); - files.add(request.getKey()); + assertEquals(BUCKET, request.bucket()); + files.add(request.key()); } Assertions.assertThat(files) .describedAs("Should have the right number of uploads") diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java index d28f4279f1b..fbad671e1fa 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java @@ -16,7 +16,6 @@ package org.apache.hadoop.fs.s3a.fileContext; import java.io.IOException; import java.net.URI; -import com.amazonaws.services.s3.model.CryptoStorageMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,7 +76,7 @@ public class ITestS3AFileContextStatistics extends FCStatisticsBaseTest { *
    * NOTE: if Client side encryption is enabled, expected bytes written * should increase by 16(padding of data) + bytes for the key ID set + 94(KMS - * key generation) in case of storage type{@link CryptoStorageMode} as + * key generation) in case of storage type CryptoStorageMode as * ObjectMetadata(Default). If Crypto Storage mode is instruction file then * add additional bytes as that file is stored separately and would account * for bytes written. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java index 378f4a70433..641bf7a2d07 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java @@ -28,7 +28,6 @@ import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; import org.assertj.core.api.Assertions; import org.junit.Test; import org.junit.runner.RunWith; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java index 2610f54b44e..73db9429732 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java @@ -23,13 +23,14 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; -import com.amazonaws.AmazonClientException; +import software.amazon.awssdk.core.exception.SdkException; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; @@ -207,7 +208,7 @@ public class ITestRenameDeleteRace extends AbstractS3ATestBase { @Override protected void maybeCreateFakeParentDirectory(final Path path) - throws IOException, AmazonClientException { + throws IOException, SdkException { LOG.info("waking anything blocked on the signal semaphore"); // notify anything waiting signalCreatingFakeParentDirectory.release(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java index 82592b1d019..81bd8a5efe2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java @@ -26,7 +26,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.assertj.core.api.Assertions; import org.assertj.core.util.Lists; import org.junit.Before; @@ -206,20 +208,20 @@ public class TestHeaderProcessing extends HadoopTestBase { final String owner = "x-header-owner"; final String root = "root"; CONTEXT_ACCESSORS.userHeaders.put(owner, root); - final ObjectMetadata source = CONTEXT_ACCESSORS + final HeadObjectResponse source = CONTEXT_ACCESSORS .getObjectMetadata(MAGIC_KEY); - final Map sourceUserMD = source.getUserMetadata(); + final Map sourceUserMD = source.metadata(); Assertions.assertThat(sourceUserMD.get(owner)) .describedAs("owner header in copied MD") .isEqualTo(root); - ObjectMetadata dest = new ObjectMetadata(); - headerProcessing.cloneObjectMetadata(source, dest); + Map destUserMetadata = new HashMap<>(); + headerProcessing.cloneObjectMetadata(source, destUserMetadata, CopyObjectRequest.builder()); - Assertions.assertThat(dest.getUserMetadata().get(X_HEADER_MAGIC_MARKER)) + Assertions.assertThat(destUserMetadata.get(X_HEADER_MAGIC_MARKER)) .describedAs("Magic marker header in copied MD") .isNull(); - Assertions.assertThat(dest.getUserMetadata().get(owner)) + Assertions.assertThat(destUserMetadata.get(owner)) .describedAs("owner header in copied MD") .isEqualTo(root); } @@ -307,20 +309,24 @@ public class TestHeaderProcessing extends HadoopTestBase { } @Override - public ObjectMetadata getObjectMetadata(final String key) + public HeadObjectResponse getObjectMetadata(final String key) throws IOException { if (MAGIC_KEY.equals(key)) { - ObjectMetadata omd = new ObjectMetadata(); - omd.setUserMetadata(userHeaders); - omd.setContentLength(len); - omd.setLastModified(date); - return omd; + return HeadObjectResponse.builder() + .metadata(userHeaders) + .contentLength(len) + .lastModified(date.toInstant()).build(); } else { throw new FileNotFoundException(key); } } + @Override + public HeadBucketResponse getBucketMetadata() throws IOException { + return HeadBucketResponse.builder().build(); + } + public void setHeader(String key, String val) { userHeaders.put(key, val); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java index 7f51d2b4536..919a89b8c1d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java @@ -18,14 +18,10 @@ package org.apache.hadoop.fs.s3a.impl; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.client.builder.AwsClientBuilder; -import org.junit.Ignore; import org.junit.Test; import org.apache.hadoop.test.AbstractHadoopTestBase; -import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createEndpointConfiguration; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; import static org.assertj.core.api.Assertions.assertThat; @@ -63,43 +59,4 @@ public class TestNetworkBinding extends AbstractHadoopTestBase { .describedAs("Fixup of %s", region) .isEqualTo(expected); } - - @Test - public void testNull() throws Throwable { - expectEndpoint("", true, "unused"); - } - - @Test - @Ignore("disabled until endpoint logic works for S3 client builder API") - public void testUSEastEndpoint() throws Throwable { - expectEndpoint(US_EAST_1, false, US_EAST_1); - } - - @Test - @Ignore("disabled until endpoint logic works for S3 client builder API") - public void testUSWestEndpoint() throws Throwable { - expectEndpoint(US_WEST_2, false, US_WEST_2); - } - - public void expectEndpoint(final String src, - final boolean expectNull, - final String expectRegion) { - AwsClientBuilder.EndpointConfiguration epr = - createEndpointConfiguration(src, new ClientConfiguration(), src); - String eprStr = epr == null - ? "(empty)" - : ("(" + epr.getServiceEndpoint() + " " + epr.getSigningRegion()); - if (expectNull) { - assertThat(epr) - .describedAs("Endpoint configuration of %s =", - src, eprStr) - .isNull(); - } else { - assertThat(epr) - .describedAs("Endpoint configuration of %s =", - src, eprStr) - .hasFieldOrPropertyWithValue("serviceEndpoint", src) - .hasFieldOrPropertyWithValue("signingRegion", expectRegion); - } - } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index 7c85142d437..1fb576a5551 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -18,21 +18,20 @@ package org.apache.hadoop.fs.s3a.impl; -import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.awscore.AwsRequest; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.audit.AWSRequestAnalyzer; @@ -57,7 +56,7 @@ public class TestRequestFactory extends AbstractHadoopTestBase { private final AWSRequestAnalyzer analyzer = new AWSRequestAnalyzer(); /** - * Count of requests analyzed via the {@link #a(AmazonWebServiceRequest)} + * Count of requests analyzed via the {@link #a(AwsRequest.Builder)} * call. */ private int requestsAnalyzed; @@ -81,27 +80,31 @@ public class TestRequestFactory extends AbstractHadoopTestBase { */ @Test public void testRequestFactoryWithCannedACL() throws Throwable { - CannedAccessControlList acl = CannedAccessControlList.BucketOwnerFullControl; + String acl = "bucket-owner-full-control"; RequestFactory factory = RequestFactoryImpl.builder() .withBucket("bucket") .withCannedACL(acl) .build(); String path = "path"; String path2 = "path2"; - ObjectMetadata md = factory.newObjectMetadata(128); - Assertions.assertThat( - factory.newPutObjectRequest(path, md, - null, new ByteArrayInputStream(new byte[0])) - .getCannedAcl()) + HeadObjectResponse md = HeadObjectResponse.builder().contentLength(128L).build(); + + Assertions.assertThat(factory.newPutObjectRequestBuilder(path, null, 128, false) + .build() + .acl() + .toString()) .describedAs("ACL of PUT") .isEqualTo(acl); - Assertions.assertThat(factory.newCopyObjectRequest(path, path2, md) - .getCannedAccessControlList()) + Assertions.assertThat(factory.newCopyObjectRequestBuilder(path, path2, md) + .build() + .acl() + .toString()) .describedAs("ACL of COPY") .isEqualTo(acl); - Assertions.assertThat(factory.newMultipartUploadRequest(path, - null) - .getCannedACL()) + Assertions.assertThat(factory.newMultipartUploadRequestBuilder(path, null) + .build() + .acl() + .toString()) .describedAs("ACL of MPU") .isEqualTo(acl); } @@ -132,21 +135,18 @@ public class TestRequestFactory extends AbstractHadoopTestBase { private final AtomicLong counter = new AtomicLong(); @Override - public T prepareRequest(final T t) { + public void prepareRequest(final SdkRequest.Builder t) { counter.addAndGet(1); - return t; } } /** * Analyze the request, log the output, return the info. - * @param request request. - * @param type of request. + * @param builder request builder. * @return value */ - private AWSRequestAnalyzer.RequestInfo - a(T request) { - AWSRequestAnalyzer.RequestInfo info = analyzer.analyze(request); + private AWSRequestAnalyzer.RequestInfo a(AwsRequest.Builder builder) { + AWSRequestAnalyzer.RequestInfo info = analyzer.analyze(builder.build()); LOG.info("{}", info); requestsAnalyzed++; return info; @@ -160,27 +160,25 @@ public class TestRequestFactory extends AbstractHadoopTestBase { String path = "path"; String path2 = "path2"; String id = "1"; - ObjectMetadata md = factory.newObjectMetadata(128); - a(factory.newAbortMultipartUploadRequest(path, id)); - a(factory.newCompleteMultipartUploadRequest(path, id, + a(factory.newAbortMultipartUploadRequestBuilder(path, id)); + a(factory.newCompleteMultipartUploadRequestBuilder(path, id, new ArrayList<>())); - a(factory.newCopyObjectRequest(path, path2, md)); - a(factory.newDeleteObjectRequest(path)); - a(factory.newBulkDeleteRequest(new ArrayList<>())); + a(factory.newCopyObjectRequestBuilder(path, path2, + HeadObjectResponse.builder().build())); + a(factory.newDeleteObjectRequestBuilder(path)); + a(factory.newBulkDeleteRequestBuilder(new ArrayList<>())); a(factory.newDirectoryMarkerRequest(path)); - a(factory.newGetObjectRequest(path)); - a(factory.newGetObjectMetadataRequest(path)); - a(factory.newListMultipartUploadsRequest(path)); - a(factory.newListObjectsV1Request(path, "/", 1)); - a(factory.newListNextBatchOfObjectsRequest(new ObjectListing())); - a(factory.newListObjectsV2Request(path, "/", 1)); - a(factory.newMultipartUploadRequest(path, null)); - File srcfile = new File("/tmp/a"); - a(factory.newPutObjectRequest(path, - factory.newObjectMetadata(-1), null, srcfile)); - ByteArrayInputStream stream = new ByteArrayInputStream(new byte[0]); - a(factory.newPutObjectRequest(path, md, null, stream)); - a(factory.newSelectRequest(path)); + a(factory.newGetObjectRequestBuilder(path)); + a(factory.newHeadObjectRequestBuilder(path)); + a(factory.newListMultipartUploadsRequestBuilder(path)); + a(factory.newListObjectsV1RequestBuilder(path, "/", 1)); + a(factory.newListObjectsV2RequestBuilder(path, "/", 1)); + a(factory.newMultipartUploadRequestBuilder(path, null)); + a(factory.newPutObjectRequestBuilder(path, + PutObjectOptions.keepingDirs(), -1, true)); + a(factory.newPutObjectRequestBuilder(path, + PutObjectOptions.deletingDirs(), 1024, false)); + a(factory.newSelectRequestBuilder(path)); } /** @@ -193,25 +191,18 @@ public class TestRequestFactory extends AbstractHadoopTestBase { RequestFactory factory = RequestFactoryImpl.builder() .withBucket("bucket") .withRequestPreparer(countRequests) + .withMultipartPartCountLimit(2) .build(); String path = "path"; - String path2 = "path2"; String id = "1"; - File srcfile = File.createTempFile("file", ""); - try { - ByteArrayInputStream stream = new ByteArrayInputStream(new byte[0]); - a(factory.newUploadPartRequest(path, id, 1, 0, stream, null, 0)); - a(factory.newUploadPartRequest(path, id, 2, 128_000_000, - null, srcfile, 0)); - // offset is past the EOF - intercept(IllegalArgumentException.class, () -> - factory.newUploadPartRequest(path, id, 3, 128_000_000, - null, srcfile, 128)); - } finally { - srcfile.delete(); - } + a(factory.newUploadPartRequestBuilder(path, id, 1, 0)); + a(factory.newUploadPartRequestBuilder(path, id, 2, 128_000_000)); + // partNumber is past the limit + intercept(PathIOException.class, () -> + factory.newUploadPartRequestBuilder(path, id, 3, 128_000_000)); + assertThat(countRequests.counter.get()) .describedAs("request preparation count") .isEqualTo(requestsAnalyzed); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java index 33a44a9ad78..7042737b310 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java @@ -19,13 +19,15 @@ package org.apache.hadoop.fs.s3a.impl; import java.io.IOException; +import java.io.InputStream; -import com.amazonaws.internal.SdkFilterInputStream; +import software.amazon.awssdk.http.Abortable; import org.assertj.core.api.Assertions; import org.junit.Test; import org.apache.hadoop.test.HadoopTestBase; + import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DRAIN_BUFFER_SIZE; import static org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext.EMPTY_INPUT_STREAM_STATISTICS; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -127,7 +129,6 @@ public class TestSDKStreamDrainer extends HadoopTestBase { public void testReadFailure() throws Throwable { int threshold = 50; SDKStreamDrainer drainer = new SDKStreamDrainer("s3://example/", - null, new FakeSDKInputStream(BYTES, threshold), false, BYTES, @@ -145,7 +146,6 @@ public class TestSDKStreamDrainer extends HadoopTestBase { public void testReadFailureDoesNotSurfaceInAbort() throws Throwable { int threshold = 50; SDKStreamDrainer drainer = new SDKStreamDrainer("s3://example/", - null, new FakeSDKInputStream(BYTES, threshold), true, BYTES, @@ -183,7 +183,6 @@ public class TestSDKStreamDrainer extends HadoopTestBase { boolean shouldAbort, FakeSDKInputStream in) throws Throwable { SDKStreamDrainer drainer = new SDKStreamDrainer("s3://example/", - null, in, shouldAbort, remaining, @@ -246,7 +245,8 @@ public class TestSDKStreamDrainer extends HadoopTestBase { * Fake stream; generates data dynamically. * Only overrides the methods used in stream draining. */ - private static final class FakeSDKInputStream extends SdkFilterInputStream { + private static final class FakeSDKInputStream extends InputStream + implements Abortable { private final int capacity; @@ -264,7 +264,6 @@ public class TestSDKStreamDrainer extends HadoopTestBase { * @param readToRaiseIOE position to raise an IOE, or -1 */ private FakeSDKInputStream(final int capacity, final int readToRaiseIOE) { - super(null); this.capacity = capacity; this.readToRaiseIOE = readToRaiseIOE; } @@ -282,11 +281,6 @@ public class TestSDKStreamDrainer extends HadoopTestBase { aborted = true; } - @Override - protected boolean isAborted() { - return aborted; - } - @Override public int read() throws IOException { if (bytesRead >= capacity) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java index ccb0c0e79ec..de0048c2558 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java @@ -27,9 +27,10 @@ import java.util.List; import java.util.concurrent.Callable; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -48,6 +49,7 @@ import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.store.audit.AuditSpan; + import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_DELETE; @@ -156,7 +158,7 @@ public class ITestDirectoryMarkerListing extends AbstractS3ATestBase { /** * S3 Client of the FS. */ - private AmazonS3 s3client; + private S3Client s3client; /** * Path to a file under the marker. @@ -604,7 +606,8 @@ public class ITestDirectoryMarkerListing extends AbstractS3ATestBase { */ private void put(final String key, final String content) throws Exception { exec("PUT " + key, () -> - s3client.putObject(bucket, key, content)); + s3client.putObject(b -> b.bucket(bucket).key(key), + RequestBody.fromString(content))); } /** * Delete an object. @@ -613,7 +616,7 @@ public class ITestDirectoryMarkerListing extends AbstractS3ATestBase { */ private void deleteObject(final String key) throws Exception { exec("DELETE " + key, () -> { - s3client.deleteObject(bucket, key); + s3client.deleteObject(b -> b.bucket(bucket).key(key)); return "deleted " + key; }); } @@ -624,10 +627,10 @@ public class ITestDirectoryMarkerListing extends AbstractS3ATestBase { * @return a description of the object. */ private String head(final String key) throws Exception { - ObjectMetadata md = exec("HEAD " + key, () -> - s3client.getObjectMetadata(bucket, key)); + HeadObjectResponse response = exec("HEAD " + key, () -> + s3client.headObject(b -> b.bucket(bucket).key(key))); return String.format("Object %s of length %d", - key, md.getInstanceLength()); + key, response.contentLength()); } /** @@ -655,7 +658,7 @@ public class ITestDirectoryMarkerListing extends AbstractS3ATestBase { ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); try (AuditSpan span = getSpanSource().createSpan(op, null, null)) { return call.call(); - } catch (AmazonClientException ex) { + } catch (SdkException ex) { throw S3AUtils.translateException(op, "", ex); } finally { timer.end(op); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java index 6e2f547a22e..5fbbc3a1279 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java @@ -21,11 +21,12 @@ package org.apache.hadoop.fs.s3a.prefetch; import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.InputStream; import java.util.concurrent.CompletableFuture; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.apache.hadoop.fs.impl.prefetch.Validate; import org.apache.hadoop.fs.s3a.S3AInputStream; @@ -55,7 +56,7 @@ class MockS3ARemoteObject extends S3ARemoteObject { super( S3APrefetchFakes.createReadContext(null, KEY, size, 1, 1), S3APrefetchFakes.createObjectAttributes(BUCKET, KEY, size), - S3APrefetchFakes.createInputStreamCallbacks(BUCKET, KEY), + S3APrefetchFakes.createInputStreamCallbacks(BUCKET), EmptyS3AStatisticsContext.EMPTY_INPUT_STREAM_STATISTICS, S3APrefetchFakes.createChangeTracker(BUCKET, KEY, size) ); @@ -68,7 +69,8 @@ class MockS3ARemoteObject extends S3ARemoteObject { } @Override - public InputStream openForRead(long offset, int size) throws IOException { + public ResponseInputStream openForRead(long offset, int size) + throws IOException { Validate.checkLessOrEqual(offset, "offset", size(), "size()"); Validate.checkLessOrEqual(size, "size", size() - offset, "size() - offset"); @@ -77,11 +79,15 @@ class MockS3ARemoteObject extends S3ARemoteObject { throw new IOException("Throwing because throwExceptionOnOpen is true "); } int bufSize = (int) Math.min(size, size() - offset); - return new ByteArrayInputStream(contents, (int) offset, bufSize); + GetObjectResponse objectResponse = GetObjectResponse.builder().build(); + return new ResponseInputStream(objectResponse, + AbortableInputStream.create(new ByteArrayInputStream(contents, + (int) offset, bufSize), () -> {})); } @Override - public void close(InputStream inputStream, int numRemainingBytes) { + public void close(ResponseInputStream inputStream, + int numRemainingBytes) { // do nothing since we do not use a real S3 stream. } @@ -92,7 +98,8 @@ class MockS3ARemoteObject extends S3ARemoteObject { public static S3AInputStream.InputStreamCallbacks createClient(String bucketName) { return new S3AInputStream.InputStreamCallbacks() { @Override - public S3Object getObject(GetObjectRequest request) { + public ResponseInputStream getObject( + GetObjectRequest request) { return null; } @@ -102,8 +109,8 @@ class MockS3ARemoteObject extends S3ARemoteObject { } @Override - public GetObjectRequest newGetRequest(String key) { - return new GetObjectRequest(bucketName, key); + public GetObjectRequest.Builder newGetRequestBuilder(String key) { + return GetObjectRequest.builder().bucket(bucketName).key(key); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java index cf6aa7ba1aa..2ceaec24773 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java @@ -31,10 +31,10 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -62,6 +62,7 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.util.functional.CallableRaisingIOE; + import static org.apache.hadoop.fs.s3a.Constants.BUFFER_DIR; import static org.apache.hadoop.fs.s3a.Constants.HADOOP_TMP_DIR; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.emptyStatisticsStore; @@ -175,32 +176,26 @@ public final class S3APrefetchFakes { createObjectAttributes(bucket, key, fileSize)); } - public static S3ObjectInputStream createS3ObjectInputStream(byte[] buffer) { - return new S3ObjectInputStream(new ByteArrayInputStream(buffer), null); + public static ResponseInputStream createS3ObjectInputStream( + GetObjectResponse objectResponse, byte[] buffer) { + return new ResponseInputStream(objectResponse, + AbortableInputStream.create(new ByteArrayInputStream(buffer), () -> {})); } public static S3AInputStream.InputStreamCallbacks createInputStreamCallbacks( - String bucket, - String key) { + String bucket) { - S3Object object = new S3Object() { - @Override - public S3ObjectInputStream getObjectContent() { - return createS3ObjectInputStream(new byte[8]); - } + GetObjectResponse objectResponse = GetObjectResponse.builder() + .eTag(E_TAG) + .build(); - @Override - public ObjectMetadata getObjectMetadata() { - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setHeader("ETag", E_TAG); - return metadata; - } - }; + ResponseInputStream responseInputStream = + createS3ObjectInputStream(objectResponse, new byte[8]); return new S3AInputStream.InputStreamCallbacks() { @Override - public S3Object getObject(GetObjectRequest request) { - return object; + public ResponseInputStream getObject(GetObjectRequest request) { + return responseInputStream; } @Override @@ -209,8 +204,8 @@ public final class S3APrefetchFakes { } @Override - public GetObjectRequest newGetRequest(String key) { - return new GetObjectRequest(bucket, key); + public GetObjectRequest.Builder newGetRequestBuilder(String key) { + return GetObjectRequest.builder().bucket(bucket).key(key); } @Override @@ -229,9 +224,6 @@ public final class S3APrefetchFakes { int prefetchBlockSize, int prefetchBlockCount) { - org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(key); - - S3AFileStatus fileStatus = createFileStatus(key, fileSize); S3ObjectAttributes s3ObjectAttributes = createObjectAttributes(bucket, key, fileSize); S3AReadOpContext s3AReadOpContext = createReadContext( @@ -242,7 +234,7 @@ public final class S3APrefetchFakes { prefetchBlockCount); S3AInputStream.InputStreamCallbacks callbacks = - createInputStreamCallbacks(bucket, key); + createInputStreamCallbacks(bucket); S3AInputStreamStatistics stats = s3AReadOpContext.getS3AStatisticsContext().newInputStreamStatistics(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java index c4949375b76..f30eb0f11ae 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java @@ -26,9 +26,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.IntFunction; -import com.amazonaws.event.ProgressEvent; -import com.amazonaws.event.ProgressEventType; -import com.amazonaws.event.ProgressListener; import org.assertj.core.api.Assertions; import org.junit.FixMethodOrder; import org.junit.Test; @@ -48,6 +45,8 @@ import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Statistic; +import org.apache.hadoop.fs.s3a.impl.ProgressListener; +import org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent; import org.apache.hadoop.fs.s3a.statistics.BlockOutputStreamStatistics; import org.apache.hadoop.fs.statistics.IOStatistics; import org.apache.hadoop.util.DurationInfo; @@ -377,10 +376,9 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase { } /** - * Progress callback from AWS. Likely to come in on a different thread. + * Progress callback. */ - private final class ProgressCallback implements Progressable, - ProgressListener { + private final class ProgressCallback implements Progressable, ProgressListener { private AtomicLong bytesTransferred = new AtomicLong(0); private AtomicLong uploadEvents = new AtomicLong(0); private AtomicInteger failures = new AtomicInteger(0); @@ -395,11 +393,8 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase { } @Override - public void progressChanged(ProgressEvent progressEvent) { - ProgressEventType eventType = progressEvent.getEventType(); - if (eventType.isByteCountEvent()) { - bytesTransferred.addAndGet(progressEvent.getBytesTransferred()); - } + public void progressChanged(ProgressListenerEvent eventType, long transferredBytes) { + switch (eventType) { case TRANSFER_PART_FAILED_EVENT: // failure @@ -408,6 +403,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase { break; case TRANSFER_PART_COMPLETED_EVENT: // completion + bytesTransferred.addAndGet(transferredBytes); long elapsedTime = timer.elapsedTime(); double elapsedTimeS = elapsedTime / 1.0e9; long written = bytesTransferred.get(); @@ -415,21 +411,18 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase { LOG.info(String.format( "Event %s; total uploaded=%d MB in %.1fs;" + " effective upload bandwidth = %.2f MB/s", - progressEvent, + eventType, writtenMB, elapsedTimeS, writtenMB / elapsedTimeS)); break; case REQUEST_BYTE_TRANSFER_EVENT: uploadEvents.incrementAndGet(); break; default: - if (!eventType.isByteCountEvent()) { - LOG.info("Event {}", progressEvent); - } + // nothing break; } } - @Override public String toString() { String sb = "ProgressCallback{" + "bytesTransferred=" + bytesTransferred.get() + diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java index 2738dd9284a..8d118e5a4fa 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java @@ -29,7 +29,7 @@ import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.assertj.core.api.Assertions; @@ -52,6 +52,7 @@ import org.apache.hadoop.fs.s3a.auth.delegation.Csvout; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.concurrent.HadoopExecutors; + import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE_DEFAULT; @@ -228,7 +229,7 @@ public class ILoadTestS3ABulkDeleteThrottling extends S3AScaleTestBase { Path basePath = path("testDeleteObjectThrottling"); final S3AFileSystem fs = getFileSystem(); final String base = fs.pathToKey(basePath); - final List fileList + final List fileList = buildDeleteRequest(base, entries); final FileWriter out = new FileWriter(csvFile); Csvout csvout = new Csvout(out, "\t", "\n"); @@ -304,12 +305,12 @@ public class ILoadTestS3ABulkDeleteThrottling extends S3AScaleTestBase { } - private List buildDeleteRequest( + private List buildDeleteRequest( String base, int count) { - List request = new ArrayList<>(count); + List request = new ArrayList<>(count); for (int i = 0; i < count; i++) { - request.add(new DeleteObjectsRequest.KeyVersion( - String.format("%s/file-%04d", base, i))); + request.add(ObjectIdentifier.builder().key( + String.format("%s/file-%04d", base, i)).build()); } return request; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java index de903b3d75a..173099bb2ca 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3ADataBlocks; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Statistic; @@ -41,6 +42,7 @@ import org.assertj.core.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -49,9 +51,8 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_KEEP; @@ -249,18 +250,19 @@ public class ITestS3ADirectoryPerformance extends S3AScaleTestBase { = fs.getWriteOperationHelper(); final RequestFactory requestFactory = writeOperationHelper.getRequestFactory(); - List> futures = + List> futures = new ArrayList<>(numOfPutRequests); for (int i=0; i - writeOperationHelper.putObject(put, PutObjectOptions.keepingDirs(), null))); + PutObjectRequest.Builder putObjectRequestBuilder = requestFactory + .newPutObjectRequestBuilder(fs.pathToKey(file), + null, 0, false); + futures.add(submit(executorService, + () -> writeOperationHelper.putObject(putObjectRequestBuilder.build(), + PutObjectOptions.keepingDirs(), + new S3ADataBlocks.BlockUploadData(new FailingInputStream()), false, null))); } LOG.info("Waiting for PUTs to complete"); waitForCompletion(futures); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java index 99407467df5..006c989604f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java @@ -126,7 +126,7 @@ public class ITestS3AHugeFilesStorageClass extends AbstractSTestS3AHugeFiles { protected void assertStorageClass(Path hugeFile) throws IOException { S3AFileSystem fs = getFileSystem(); - String actual = fs.getObjectMetadata(hugeFile).getStorageClass(); + String actual = fs.getObjectMetadata(hugeFile).storageClassAsString(); assertTrue( "Storage class of object is " + actual + ", expected " + STORAGE_CLASS_REDUCED_REDUNDANCY, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java index 2c1a10a21d0..a3d41116182 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java @@ -263,9 +263,9 @@ public abstract class AbstractS3SelectTest extends AbstractS3ATestBase { protected static AWSServiceIOException verifyErrorCode(final String code, final AWSServiceIOException ex) { logIntercepted(ex); - if (!code.equals(ex.getErrorCode())) { + if (!code.equals(ex.awsErrorDetails().errorCode())) { throw new AssertionError("Expected Error code" + code - + " actual " + ex.getErrorCode(), + + " actual " + ex.awsErrorDetails().errorCode(), ex); } return ex; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java index 51da971fb70..4d4af822ee5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java @@ -415,7 +415,7 @@ public class ITestS3SelectLandsat extends AbstractS3SelectTest { long increment = 64 * _1KB; // seek forward, comparing bytes - for(offset = 32 * _1KB; offset < _1MB; offset += increment) { + for(offset = 32 * _1KB; offset < 256 * _1KB; offset += increment) { seek(seekStream, offset); assertEquals("Seek position in " + seekStream, offset, seekStream.getPos()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java new file mode 100644 index 00000000000..c770b889733 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.util.Iterator; +import java.util.concurrent.Executor; +import java.util.stream.Stream; + +import org.reactivestreams.Subscriber; +import org.reactivestreams.Subscription; +import software.amazon.awssdk.core.async.SdkPublisher; + +/** + * Publisher used to test the handling of asynchronous responses. + * @param The type of published elements. + */ +final class StreamPublisher implements SdkPublisher { + private final Executor executor; + private final Iterator iterator; + private Boolean done = false; + + public StreamPublisher(Stream data, Executor executor) { + this.iterator = data.iterator(); + this.executor = executor; + } + + public StreamPublisher(Stream data) { + this(data, Runnable::run); + } + + @Override + public void subscribe(Subscriber subscriber) { + subscriber.onSubscribe(new Subscription() { + @Override + public void request(long n) { + if (done) { + return; + } + + if (n < 1) { + done = true; + executor.execute(() -> subscriber.onError(new IllegalArgumentException())); + return; + } + + for (long i = 0; i < n; i++) { + final T value; + try { + synchronized (iterator) { + value = iterator.hasNext() ? iterator.next() : null; + } + } catch (Throwable e) { + executor.execute(() -> subscriber.onError(e)); + break; + } + + if (value == null) { + done = true; + executor.execute(subscriber::onComplete); + break; + } else { + executor.execute(() -> subscriber.onNext(value)); + } + } + } + + @Override + public void cancel() { + done = true; + } + }); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java new file mode 100644 index 00000000000..43bdcb062f0 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import org.junit.Assert; +import org.junit.Test; +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; + +/** + * Unit tests for {@link BlockingEnumeration}. + */ +public final class TestBlockingEnumeration extends Assert { + + @Test + public void containsElement() { + SdkPublisher publisher = new StreamPublisher<>(Stream.of("foo")); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + + assertTrue(enumeration.hasMoreElements()); + assertEquals("foo", enumeration.nextElement()); + assertFalse(enumeration.hasMoreElements()); + } + + @Test + public void containsInjectedElement() { + SdkPublisher publisher = new StreamPublisher<>(Stream.of("foo")); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1, "bar"); + + assertTrue(enumeration.hasMoreElements()); + assertEquals("bar", enumeration.nextElement()); + assertTrue(enumeration.hasMoreElements()); + assertEquals("foo", enumeration.nextElement()); + assertFalse(enumeration.hasMoreElements()); + } + + @Test + public void throwsExceptionOnFirstElement() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .map(i -> { + throw SdkException.create("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertThrows(SdkException.class, enumeration::hasMoreElements); + } + + @Test + public void throwsExceptionAfterInjectedElement() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + throw SdkException.create("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1, 99); + assertTrue(enumeration.hasMoreElements()); + assertEquals(99, enumeration.nextElement().intValue()); + assertThrows(SdkException.class, enumeration::hasMoreElements); + } + + @Test + public void throwsNonSdkException() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + throw new RuntimeException("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + SdkException exception = Assert.assertThrows(SdkException.class, enumeration::hasMoreElements); + assertEquals(RuntimeException.class, exception.getCause().getClass()); + } + + @Test + public void throwsError() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + throw new Error("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertThrows(Error.class, enumeration::hasMoreElements); + } + + @Test + public void throwsExceptionOnSecondElement() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + if (i == 1) { + throw SdkException.create("error!", null); + } + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertTrue(enumeration.hasMoreElements()); + assertEquals(0, enumeration.nextElement().intValue()); + assertThrows(SdkException.class, enumeration::hasMoreElements); + } + + @Test + public void noMoreElementsAfterThrow() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .map(i -> { + throw SdkException.create("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertThrows(SdkException.class, enumeration::hasMoreElements); + assertFalse(enumeration.hasMoreElements()); + } + + @Test + public void buffersOnSameThread() { + verifyBuffering(10, 3, Runnable::run); + } + + @Test + public void publisherOnDifferentThread() { + verifyBuffering(5, 1, Executors.newSingleThreadExecutor()); + } + + @Test + public void publisherOnDifferentThreadWithBuffer() { + verifyBuffering(30, 10, Executors.newSingleThreadExecutor()); + } + + private static void verifyBuffering(int length, int bufferSize, Executor executor) { + AtomicInteger emitted = new AtomicInteger(); + SdkPublisher publisher = new StreamPublisher<>( + IntStream.range(0, length).boxed().peek(i -> emitted.incrementAndGet()), + executor); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, bufferSize); + + int pulled = 0; + while (true) { + try { + int expected = Math.min(length, pulled + bufferSize); + if (expected != emitted.get()) { + Thread.sleep(10); + } + assertEquals(expected, emitted.get()); + } catch (InterruptedException e) { + fail("Interrupted: " + e); + } + + if (!enumeration.hasMoreElements()) { + break; + } + + int i = enumeration.nextElement(); + assertEquals(pulled, i); + pulled++; + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java new file mode 100644 index 00000000000..faf32fe4fd9 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Stream; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; + +/** + * Unit tests for {@link SelectEventStreamPublisher}. + */ +@RunWith(Parameterized.class) +public final class TestSelectEventStreamPublisher extends Assert { + + @Parameterized.Parameters(name = "threading-{0}") + public static Collection params() { + return Arrays.asList(new Object[][]{ + {"main"}, + {"background"} + }); + } + + private final String threading; + + public TestSelectEventStreamPublisher(String threading) { + this.threading = threading; + } + + private Executor createExecutor() { + if (threading.equals("main")) { + return Runnable::run; + } else if (threading.equals("background")) { + return Executors.newSingleThreadExecutor(); + } else { + throw new IllegalArgumentException("Unknown: " + threading); + } + } + + @Test + public void emptyRecordsInputStream() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromByteArray(new byte[0])) + .build())); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + assertEquals(-1, inputStream.read()); + } + } + + @Test + public void multipleRecords() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("bar")) + .build())); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + String result = readAll(inputStream); + assertEquals("foobar", result); + } + } + + @Test + public void skipsOtherEvents() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.progressBuilder() + .build(), + SelectObjectContentEventStream.statsBuilder() + .build(), + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("bar")) + .build(), + SelectObjectContentEventStream.endBuilder() + .build())); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + String result = readAll(inputStream); + assertEquals("foobar", result); + } + } + + @Test + public void callsOnEndEvent() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.endBuilder() + .build())); + + AtomicBoolean endEvent = new AtomicBoolean(false); + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> endEvent.set(true))) { + String result = readAll(inputStream); + assertEquals("foo", result); + } + + assertTrue(endEvent.get()); + } + + @Test + public void handlesErrors() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("bar")) + .build()) + .map(e -> { throw SdkException.create("error!", null); })); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + assertThrows(SdkException.class, () -> readAll(inputStream)); + } + } + + private SelectEventStreamPublisher createSelectPublisher( + Stream stream) { + SdkPublisher sdkPublisher = + new StreamPublisher<>(stream, createExecutor()); + CompletableFuture future = + CompletableFuture.completedFuture(null); + SelectObjectContentResponse response = + SelectObjectContentResponse.builder().build(); + return new SelectEventStreamPublisher(future, response, sdkPublisher); + } + + private static String readAll(InputStream inputStream) throws IOException { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + byte[] buffer = new byte[8096]; + int read; + while ((read = inputStream.read(buffer, 0, buffer.length)) != -1) { + outputStream.write(buffer, 0, read); + } + return outputStream.toString(); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java index 77c7736575c..fdf5eb53e18 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java @@ -147,7 +147,7 @@ public final class ExtraAssertions { */ protected void assertStatusCode(AWSServiceIOException e, int code) throws AWSServiceIOException { - if (e.getStatusCode() != code) { + if (e.statusCode() != code) { throw e; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java index fa1ad2db62a..004e15676a0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java @@ -21,10 +21,9 @@ package org.apache.hadoop.fs.s3a.test; import java.io.IOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.transfer.model.CopyResult; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -33,6 +32,7 @@ import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; import org.apache.hadoop.fs.s3a.S3AReadOpContext; import org.apache.hadoop.fs.s3a.S3ObjectAttributes; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; /** @@ -88,7 +88,7 @@ public class MinimalOperationCallbacks } @Override - public CopyResult copyFile( + public CopyObjectResponse copyFile( String srcKey, String destKey, S3ObjectAttributes srcAttributes, @@ -99,9 +99,9 @@ public class MinimalOperationCallbacks @Override public void removeKeys( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException { } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java index ffba558d11f..eee0c719505 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java @@ -18,10 +18,12 @@ package org.apache.hadoop.fs.s3a.test; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; +import java.util.concurrent.CompletableFuture; + +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import org.apache.hadoop.fs.s3a.WriteOperationHelper; @@ -32,12 +34,14 @@ public class MinimalWriteOperationHelperCallbacks implements WriteOperationHelper.WriteOperationHelperCallbacks { @Override - public SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request) { + public CompletableFuture selectObjectContent( + SelectObjectContentRequest request, + SelectObjectContentResponseHandler th) { return null; } @Override - public CompleteMultipartUploadResult completeMultipartUpload( + public CompleteMultipartUploadResponse completeMultipartUpload( CompleteMultipartUploadRequest request) { return null; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java index 127fcbab750..ab22c51f28b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java @@ -224,13 +224,6 @@ public class ITestMarkerTool extends AbstractMarkerToolTest { runToFailure(EXIT_USAGE, MARKERS); } - @Test - public void testRunWrongBucket() throws Throwable { - runToFailure(EXIT_NOT_FOUND, MARKERS, - AUDIT, - "s3a://this-bucket-does-not-exist-hopefully"); - } - /** * Run with a path that doesn't exist. */