HADOOP-18073. Upgrade S3A in 3.3 branch to AWS SDK V2.(#5707)

Contributed by Ahmar Suhail
This commit is contained in:
ahmarsuhail 2023-06-08 18:19:50 +01:00 committed by GitHub
parent c9f2c45209
commit 1fe918cdf7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
187 changed files with 7044 additions and 5228 deletions

View File

@ -407,6 +407,10 @@ public final class StoreStatisticNames {
public static final String MULTIPART_UPLOAD_LIST
= "multipart_upload_list";
/** Probe for store region: {@value}. */
public static final String STORE_REGION_PROBE
= "store_region_probe";
private StoreStatisticNames() {
}

View File

@ -189,6 +189,8 @@
<surefire.fork.timeout>900</surefire.fork.timeout>
<aws-java-sdk.version>1.12.316</aws-java-sdk.version>
<hsqldb.version>2.7.1</hsqldb.version>
<aws-java-sdk-v2.version>2.19.12</aws-java-sdk-v2.version>
<awscrt.version>0.21.0</awscrt.version>
<frontend-maven-plugin.version>1.11.2</frontend-maven-plugin.version>
<jasmine-maven-plugin.version>2.1</jasmine-maven-plugin.version>
<phantomjs-maven-plugin.version>0.7</phantomjs-maven-plugin.version>
@ -1108,8 +1110,13 @@
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-bundle</artifactId>
<artifactId>aws-java-sdk-core</artifactId>
<version>${aws-java-sdk.version}</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>bundle</artifactId>
<version>${aws-java-sdk-v2.version}</version>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
@ -1117,6 +1124,11 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>software.amazon.awssdk.crt</groupId>
<artifactId>aws-crt</artifactId>
<version>${awscrt.version}</version>
</dependency>
<dependency>
<groupId>org.apache.mina</groupId>
<artifactId>mina-core</artifactId>

View File

@ -485,7 +485,17 @@
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-bundle</artifactId>
<artifactId>aws-java-sdk-core</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>bundle</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>software.amazon.awssdk.crt</groupId>
<artifactId>aws-crt</artifactId>
<scope>compile</scope>
</dependency>
<dependency>

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonServiceException;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
/**
* A 400 "Bad Request" exception was received.
@ -36,7 +36,7 @@ public class AWSBadRequestException extends AWSServiceIOException {
* @param cause the underlying cause
*/
public AWSBadRequestException(String operation,
AmazonServiceException cause) {
AwsServiceException cause) {
super(operation, cause);
}
}

View File

@ -18,34 +18,33 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonClientException;
import com.amazonaws.SdkBaseException;
import software.amazon.awssdk.core.exception.SdkException;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import java.io.IOException;
/**
* IOException equivalent of an {@link AmazonClientException}.
* IOException equivalent of an {@link SdkException}.
*/
public class AWSClientIOException extends IOException {
private final String operation;
public AWSClientIOException(String operation,
SdkBaseException cause) {
SdkException cause) {
super(cause);
Preconditions.checkArgument(operation != null, "Null 'operation' argument");
Preconditions.checkArgument(cause != null, "Null 'cause' argument");
this.operation = operation;
}
public AmazonClientException getCause() {
return (AmazonClientException) super.getCause();
public SdkException getCause() {
return (SdkException) super.getCause();
}
@Override
public String getMessage() {
return operation + ": " + getCause().getMessage();
}
}

View File

@ -27,10 +27,11 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AnonymousAWSCredentials;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.BasicSessionCredentials;
import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;
@ -43,6 +44,12 @@ import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException;
import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException;
import org.apache.hadoop.io.IOUtils;
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
import software.amazon.awssdk.core.exception.SdkException;
/**
* A list of providers.
*
@ -51,17 +58,17 @@ import org.apache.hadoop.io.IOUtils;
* <ol>
* <li>Allows extra providers to be added dynamically.</li>
* <li>If any provider in the chain throws an exception other than
* an {@link AmazonClientException}, that is rethrown, rather than
* an {@link SdkException}, that is rethrown, rather than
* swallowed.</li>
* <li>Has some more diagnostics.</li>
* <li>On failure, the last "relevant" AmazonClientException raised is
* <li>On failure, the last "relevant" {@link SdkException} raised is
* rethrown; exceptions other than 'no credentials' have priority.</li>
* <li>Special handling of {@link AnonymousAWSCredentials}.</li>
* <li>Special handling of {@link AnonymousCredentialsProvider}.</li>
* </ol>
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public final class AWSCredentialProviderList implements AWSCredentialsProvider,
public final class AWSCredentialProviderList implements AwsCredentialsProvider,
AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(
@ -73,9 +80,9 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
CREDENTIALS_REQUESTED_WHEN_CLOSED
= "Credentials requested after provider list was closed";
private final List<AWSCredentialsProvider> providers = new ArrayList<>(1);
private final List<AwsCredentialsProvider> providers = new ArrayList<>(1);
private boolean reuseLastProvider = true;
private AWSCredentialsProvider lastProvider;
private AwsCredentialsProvider lastProvider;
private final AtomicInteger refCount = new AtomicInteger(1);
@ -99,7 +106,9 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
*/
public AWSCredentialProviderList(
Collection<AWSCredentialsProvider> providers) {
this.providers.addAll(providers);
for (AWSCredentialsProvider provider: providers) {
this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider));
}
}
/**
@ -110,6 +119,19 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
public AWSCredentialProviderList(final String name,
final AWSCredentialsProvider... providerArgs) {
setName(name);
for (AWSCredentialsProvider provider: providerArgs) {
this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider));
}
}
/**
* Create with an initial list of SDK V2 credential providers.
* @param name name for error messages, may be ""
* @param providerArgs provider list.
*/
public AWSCredentialProviderList(final String name,
final AwsCredentialsProvider... providerArgs) {
setName(name);
Collections.addAll(providers, providerArgs);
}
@ -127,12 +149,21 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
/**
* Add a new provider.
* @param p provider
* @param provider provider
*/
public void add(AWSCredentialsProvider p) {
providers.add(p);
public void add(AWSCredentialsProvider provider) {
providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider));
}
/**
* Add a new SDK V2 provider.
* @param provider provider
*/
public void add(AwsCredentialsProvider provider) {
providers.add(provider);
}
/**
* Add all providers from another list to this one.
* @param other the other list.
@ -142,15 +173,18 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
}
/**
* Refresh all child entries.
* This method will get credentials using SDK V2's resolveCredentials and then convert it into
* V1 credentials. This required by delegation token binding classes.
* @return SDK V1 credentials
*/
@Override
public void refresh() {
if (isClosed()) {
return;
}
for (AWSCredentialsProvider provider : providers) {
provider.refresh();
public AWSCredentials getCredentials() {
AwsCredentials credentials = resolveCredentials();
if (credentials instanceof AwsSessionCredentials) {
return new BasicSessionCredentials(credentials.accessKeyId(),
credentials.secretAccessKey(),
((AwsSessionCredentials) credentials).sessionToken());
} else {
return new BasicAWSCredentials(credentials.accessKeyId(), credentials.secretAccessKey());
}
}
@ -160,7 +194,7 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
* @return a set of credentials (possibly anonymous), for authenticating.
*/
@Override
public AWSCredentials getCredentials() {
public AwsCredentials resolveCredentials() {
if (isClosed()) {
LOG.warn(CREDENTIALS_REQUESTED_WHEN_CLOSED);
throw new NoAuthWithAWSException(name +
@ -168,18 +202,18 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
}
checkNotEmpty();
if (reuseLastProvider && lastProvider != null) {
return lastProvider.getCredentials();
return lastProvider.resolveCredentials();
}
AmazonClientException lastException = null;
for (AWSCredentialsProvider provider : providers) {
SdkException lastException = null;
for (AwsCredentialsProvider provider : providers) {
try {
AWSCredentials credentials = provider.getCredentials();
AwsCredentials credentials = provider.resolveCredentials();
Preconditions.checkNotNull(credentials,
"Null credentials returned by %s", provider);
if ((credentials.getAWSAccessKeyId() != null &&
credentials.getAWSSecretKey() != null)
|| (credentials instanceof AnonymousAWSCredentials)) {
if ((credentials.accessKeyId() != null && credentials.secretAccessKey() != null) || (
provider instanceof AnonymousCredentialsProvider
|| provider instanceof AnonymousAWSCredentialsProvider)) {
lastProvider = provider;
LOG.debug("Using credentials from {}", provider);
return credentials;
@ -196,7 +230,7 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
}
LOG.debug("No credentials from {}: {}",
provider, e.toString());
} catch (AmazonClientException e) {
} catch (SdkException e) {
lastException = e;
LOG.debug("No credentials provided by {}: {}",
provider, e.toString(), e);
@ -223,13 +257,13 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
* @return providers
*/
@VisibleForTesting
List<AWSCredentialsProvider> getProviders() {
List<AwsCredentialsProvider> getProviders() {
return providers;
}
/**
* Verify that the provider list is not empty.
* @throws AmazonClientException if there are no providers.
* @throws SdkException if there are no providers.
*/
public void checkNotEmpty() {
if (providers.isEmpty()) {
@ -317,7 +351,7 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider,
}
// do this outside the synchronized block.
for (AWSCredentialsProvider p : providers) {
for (AwsCredentialsProvider p : providers) {
if (p instanceof Closeable) {
IOUtils.closeStream((Closeable) p);
} else if (p instanceof AutoCloseable) {

View File

@ -18,14 +18,14 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonServiceException;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
/**
* Status code 443, no response from server. This is considered idempotent.
*/
public class AWSNoResponseException extends AWSServiceIOException {
public AWSNoResponseException(String operation,
AmazonServiceException cause) {
AwsServiceException cause) {
super(operation, cause);
}
}

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonServiceException;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
/**
* Request is redirected.
@ -32,7 +32,7 @@ public class AWSRedirectException extends AWSServiceIOException {
* @param cause the underlying cause
*/
public AWSRedirectException(String operation,
AmazonServiceException cause) {
AwsServiceException cause) {
super(operation, cause);
}
}

View File

@ -18,14 +18,13 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import software.amazon.awssdk.services.s3.model.S3Exception;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import java.util.Map;
/**
* Wrap a {@link AmazonS3Exception} as an IOE, relaying all
* Wrap a {@link S3Exception} as an IOE, relaying all
* getters.
*/
@InterfaceAudience.Public
@ -38,24 +37,12 @@ public class AWSS3IOException extends AWSServiceIOException {
* @param cause the underlying cause
*/
public AWSS3IOException(String operation,
AmazonS3Exception cause) {
S3Exception cause) {
super(operation, cause);
}
public AmazonS3Exception getCause() {
return (AmazonS3Exception) super.getCause();
}
public String getErrorResponseXml() {
return getCause().getErrorResponseXml();
}
public Map<String, String> getAdditionalDetails() {
return getCause().getAdditionalDetails();
}
public String getExtendedRequestId() {
return getCause().getExtendedRequestId();
public S3Exception getCause() {
return (S3Exception) super.getCause();
}
}

View File

@ -18,13 +18,15 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonServiceException;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.awscore.exception.AwsErrorDetails;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* A specific exception from AWS operations.
* The exception must always be created with an {@link AmazonServiceException}.
* The exception must always be created with an {@link AwsServiceException}.
* The attributes of this exception can all be directly accessed.
*/
@InterfaceAudience.Public
@ -37,36 +39,31 @@ public class AWSServiceIOException extends AWSClientIOException {
* @param cause the underlying cause
*/
public AWSServiceIOException(String operation,
AmazonServiceException cause) {
AwsServiceException cause) {
super(operation, cause);
}
public AmazonServiceException getCause() {
return (AmazonServiceException) super.getCause();
public AwsServiceException getCause() {
return (AwsServiceException) super.getCause();
}
public String getRequestId() {
return getCause().getRequestId();
public boolean retryable() {
return getCause().retryable();
}
public String getServiceName() {
return getCause().getServiceName();
public String requestId() {
return getCause().requestId();
}
public String getErrorCode() {
return getCause().getErrorCode();
public AwsErrorDetails awsErrorDetails() {
return getCause().awsErrorDetails();
}
public int getStatusCode() {
return getCause().getStatusCode();
public int statusCode() {
return getCause().statusCode();
}
public String getRawResponseContent() {
return getCause().getRawResponseContent();
public String extendedRequestId() {
return getCause().extendedRequestId();
}
public boolean isRetryable() {
return getCause().isRetryable();
}
}

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonServiceException;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
/**
* Exception raised when a service was throttled.
@ -36,7 +36,7 @@ public class AWSServiceThrottledException extends AWSServiceIOException {
* @param cause the underlying cause
*/
public AWSServiceThrottledException(String operation,
AmazonServiceException cause) {
AwsServiceException cause) {
super(operation, cause);
}
}

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonServiceException;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
/**
* A 500 response came back from a service.
@ -31,7 +31,7 @@ import com.amazonaws.AmazonServiceException;
*/
public class AWSStatus500Exception extends AWSServiceIOException {
public AWSStatus500Exception(String operation,
AmazonServiceException cause) {
AwsServiceException cause) {
super(operation, cause);
}
}

View File

@ -18,9 +18,10 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AnonymousAWSCredentials;
import com.amazonaws.auth.AWSCredentials;
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -35,23 +36,18 @@ import org.apache.hadoop.classification.InterfaceStability;
* property fs.s3a.aws.credentials.provider. Therefore, changing the class name
* would be a backward-incompatible change.
*
* @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider
* as part of upgrading S3A to SDK V2. See HADOOP-18073.
*/
@InterfaceAudience.Private
@InterfaceStability.Stable
@Deprecated
public class AnonymousAWSCredentialsProvider implements AWSCredentialsProvider {
public class AnonymousAWSCredentialsProvider implements AwsCredentialsProvider {
public static final String NAME
= "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider";
public AWSCredentials getCredentials() {
return new AnonymousAWSCredentials();
public AwsCredentials resolveCredentials() {
return AnonymousCredentialsProvider.create().resolveCredentials();
}
public void refresh() {}
@Override
public String toString() {
return getClass().getSimpleName();

View File

@ -20,7 +20,7 @@ package org.apache.hadoop.fs.s3a;
import javax.annotation.Nonnull;
import com.amazonaws.arn.Arn;
import software.amazon.awssdk.arns.Arn;
/**
* Represents an Arn Resource, this can be an accesspoint or bucket.
@ -120,14 +120,14 @@ public final class ArnResource {
public static ArnResource accessPointFromArn(String arn) throws IllegalArgumentException {
Arn parsed = Arn.fromString(arn);
if (parsed.getRegion().isEmpty() || parsed.getAccountId().isEmpty() ||
parsed.getResourceAsString().isEmpty()) {
if (!parsed.region().isPresent() || !parsed.accountId().isPresent() ||
parsed.resourceAsString().isEmpty()) {
throw new IllegalArgumentException(
String.format("Access Point Arn %s has an invalid format or missing properties", arn));
}
String resourceName = parsed.getResource().getResource();
return new ArnResource(resourceName, parsed.getAccountId(), parsed.getRegion(),
parsed.getPartition(), arn);
String resourceName = parsed.resource().resource();
return new ArnResource(resourceName, parsed.accountId().get(), parsed.region().get(),
parsed.partition(), arn);
}
}

View File

@ -140,7 +140,6 @@ public final class Constants {
public static final String ASSUMED_ROLE_POLICY =
"fs.s3a.assumed.role.policy";
@SuppressWarnings("deprecation")
public static final String ASSUMED_ROLE_CREDENTIALS_DEFAULT =
SimpleAWSCredentialsProvider.NAME;
@ -586,7 +585,7 @@ public final class Constants {
public static final String SIGNING_ALGORITHM_STS =
"fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase()
+ "signing-algorithm";
+ ".signing-algorithm";
public static final String S3N_FOLDER_SUFFIX = "_$folder$";
public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size";
@ -735,7 +734,6 @@ public final class Constants {
@InterfaceAudience.Private
@InterfaceStability.Unstable
@SuppressWarnings("deprecation")
public static final Class<? extends S3ClientFactory>
DEFAULT_S3_CLIENT_FACTORY_IMPL =
DefaultS3ClientFactory.class;
@ -1275,4 +1273,9 @@ public final class Constants {
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_MULTIPART_UPLOAD_ENABLED =
"fs.s3a.capability.multipart.uploads.enabled";
/**
* The bucket region header.
*/
public static final String BUCKET_REGION_HEADER = "x-amz-bucket-region";
}

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonClientException;
import software.amazon.awssdk.core.exception.SdkException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -26,8 +26,8 @@ import org.apache.hadoop.classification.InterfaceStability;
/**
* Exception which Hadoop's AWSCredentialsProvider implementations should
* throw when there is a problem with the credential setup. This
* is a subclass of {@link AmazonClientException} which sets
* {@link #isRetryable()} to false, so as to fail fast.
* is a subclass of {@link SdkException} which sets
* {@link #retryable()} to false, so as to fail fast.
* This is used in credential providers and elsewhere.
* When passed through {@code S3AUtils.translateException()} it
* is mapped to an AccessDeniedException. As a result, the Invoker
@ -35,13 +35,14 @@ import org.apache.hadoop.classification.InterfaceStability;
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class CredentialInitializationException extends AmazonClientException {
public class CredentialInitializationException extends SdkException {
public CredentialInitializationException(String message, Throwable t) {
super(message, t);
super(builder().message(message).cause(t));
}
public CredentialInitializationException(String message) {
super(message);
super(builder().message(message));
}
/**
@ -49,7 +50,7 @@ public class CredentialInitializationException extends AmazonClientException {
* @return false, always.
*/
@Override
public boolean isRetryable() {
public boolean retryable() {
return false;
}
}

View File

@ -20,32 +20,25 @@ package org.apache.hadoop.fs.s3a;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.SdkClientException;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.regions.RegionUtils;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Builder;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.AmazonS3EncryptionClientV2Builder;
import com.amazonaws.services.s3.AmazonS3EncryptionV2;
import com.amazonaws.services.s3.S3ClientOptions;
import com.amazonaws.services.s3.internal.ServiceUtils;
import com.amazonaws.services.s3.model.CryptoConfigurationV2;
import com.amazonaws.services.s3.model.CryptoMode;
import com.amazonaws.services.s3.model.CryptoRangeGetMode;
import com.amazonaws.services.s3.model.EncryptionMaterialsProvider;
import com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider;
import com.amazonaws.util.AwsHostNameUtils;
import com.amazonaws.util.RuntimeHttpUtils;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import software.amazon.awssdk.core.retry.RetryPolicy;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3AsyncClient;
import software.amazon.awssdk.services.s3.S3BaseClientBuilder;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3Configuration;
import software.amazon.awssdk.transfer.s3.S3TransferManager;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -54,15 +47,11 @@ import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
import org.apache.hadoop.fs.store.LogExactlyOnce;
import static com.amazonaws.services.s3.Headers.REQUESTER_PAYS_HEADER;
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY;
import static org.apache.hadoop.fs.s3a.S3AUtils.getEncryptionAlgorithm;
import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey;
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3;
/**
* The default {@link S3ClientFactory} implementation.
@ -71,12 +60,9 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
@SuppressWarnings("deprecation")
public class DefaultS3ClientFactory extends Configured
implements S3ClientFactory {
private static final String S3_SERVICE_NAME = "s3";
private static final String REQUESTER_PAYS_HEADER_VALUE = "requester";
/**
@ -85,310 +71,153 @@ public class DefaultS3ClientFactory extends Configured
protected static final Logger LOG =
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
/**
* A one-off warning of default region chains in use.
*/
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
new LogExactlyOnce(LOG);
/**
* Warning message printed when the SDK Region chain is in use.
*/
private static final String SDK_REGION_CHAIN_IN_USE =
"S3A filesystem client is using"
+ " the SDK region resolution chain.";
/** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */
private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG);
/** Bucket name. */
private String bucket;
/**
* Create the client by preparing the AwsConf configuration
* and then invoking {@code buildAmazonS3Client()}.
*/
@Override
public AmazonS3 createS3Client(
public S3Client createS3Client(
final URI uri,
final S3ClientCreationParameters parameters) throws IOException {
Configuration conf = getConf();
bucket = uri.getHost();
final ClientConfiguration awsConf = S3AUtils
.createAwsConf(conf,
bucket,
Constants.AWS_SERVICE_IDENTIFIER_S3);
String bucket = uri.getHost();
ApacheHttpClient.Builder httpClientBuilder = AWSClientConfig
.createHttpClientBuilder(conf)
.proxyConfiguration(AWSClientConfig.createProxyConfiguration(conf, bucket));
return configureClientBuilder(S3Client.builder(), parameters, conf, bucket)
.httpClientBuilder(httpClientBuilder)
.build();
}
@Override
public S3AsyncClient createS3AsyncClient(
final URI uri,
final S3ClientCreationParameters parameters) throws IOException {
Configuration conf = getConf();
String bucket = uri.getHost();
NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig
.createAsyncHttpClientBuilder(conf)
.proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, bucket));
return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket)
.httpClientBuilder(httpClientBuilder)
.build();
}
@Override
public S3TransferManager createS3TransferManager(final S3AsyncClient s3AsyncClient) {
return S3TransferManager.builder()
.s3Client(s3AsyncClient)
.build();
}
/**
* Configure a sync or async S3 client builder.
* This method handles all shared configuration.
* @param builder S3 client builder
* @param parameters parameter object
* @param conf configuration object
* @param bucket bucket name
* @return the builder object
* @param <BuilderT> S3 client builder type
* @param <ClientT> S3 client type
*/
private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> BuilderT configureClientBuilder(
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf, String bucket)
throws IOException {
Region region = parameters.getRegion();
LOG.debug("Using region {}", region);
URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf);
if (endpoint != null) {
builder.endpointOverride(endpoint);
LOG.debug("Using endpoint {}", endpoint);
}
S3Configuration serviceConfiguration = S3Configuration.builder()
.pathStyleAccessEnabled(parameters.isPathStyleAccess())
.build();
return builder
.overrideConfiguration(createClientOverrideConfiguration(parameters, conf))
.credentialsProvider(parameters.getCredentialSet())
.region(region)
.serviceConfiguration(serviceConfiguration);
}
/**
* Create an override configuration for an S3 client.
* @param parameters parameter object
* @param conf configuration object
* @throws IOException any IOE raised, or translated exception
* @return the override configuration
*/
protected ClientOverrideConfiguration createClientOverrideConfiguration(
S3ClientCreationParameters parameters, Configuration conf) throws IOException {
final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder =
AWSClientConfig.createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3);
// add any headers
parameters.getHeaders().forEach((h, v) ->
awsConf.addHeader(h, v));
parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v));
if (parameters.isRequesterPays()) {
// All calls must acknowledge requester will pay via header.
awsConf.addHeader(REQUESTER_PAYS_HEADER, REQUESTER_PAYS_HEADER_VALUE);
clientOverrideConfigBuilder.putHeader(REQUESTER_PAYS_HEADER, REQUESTER_PAYS_HEADER_VALUE);
}
// When EXPERIMENTAL_AWS_INTERNAL_THROTTLING is false
// throttling is explicitly disabled on the S3 client so that
// all failures are collected in S3A instrumentation, and its
// retry policy is the only one used.
// This may cause problems in copy/rename.
awsConf.setUseThrottleRetries(
conf.getBoolean(EXPERIMENTAL_AWS_INTERNAL_THROTTLING,
EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT));
if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) {
awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
clientOverrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_SUFFIX,
parameters.getUserAgentSuffix());
}
// Get the encryption method for this bucket.
S3AEncryptionMethods encryptionMethods =
getEncryptionAlgorithm(bucket, conf);
try {
// If CSE is enabled then build a S3EncryptionClient.
if (S3AEncryptionMethods.CSE_KMS.getMethod()
.equals(encryptionMethods.getMethod())) {
return buildAmazonS3EncryptionClient(
awsConf,
parameters);
} else {
return buildAmazonS3Client(
awsConf,
parameters);
if (parameters.getExecutionInterceptors() != null) {
for (ExecutionInterceptor interceptor : parameters.getExecutionInterceptors()) {
clientOverrideConfigBuilder.addExecutionInterceptor(interceptor);
}
} catch (SdkClientException e) {
// SDK refused to build.
throw translateException("creating AWS S3 client", uri.toString(), e);
}
}
/**
* Create an {@link AmazonS3} client of type
* {@link AmazonS3EncryptionV2} if CSE is enabled.
*
* @param awsConf AWS configuration.
* @param parameters parameters.
*
* @return new AmazonS3 client.
* @throws IOException if lookupPassword() has any problem.
*/
protected AmazonS3 buildAmazonS3EncryptionClient(
final ClientConfiguration awsConf,
final S3ClientCreationParameters parameters) throws IOException {
AmazonS3 client;
AmazonS3EncryptionClientV2Builder builder =
new AmazonS3EncryptionClientV2Builder();
Configuration conf = getConf();
// CSE-KMS Method
String kmsKeyId = getS3EncryptionKey(bucket, conf, true);
// Check if kmsKeyID is not null
Preconditions.checkArgument(!StringUtils.isBlank(kmsKeyId), "CSE-KMS "
+ "method requires KMS key ID. Use " + S3_ENCRYPTION_KEY
+ " property to set it. ");
EncryptionMaterialsProvider materialsProvider =
new KMSEncryptionMaterialsProvider(kmsKeyId);
builder.withEncryptionMaterialsProvider(materialsProvider);
//Configure basic params of a S3 builder.
configureBasicParams(builder, awsConf, parameters);
// Configuring endpoint.
AmazonS3EncryptionClientV2Builder.EndpointConfiguration epr
= createEndpointConfiguration(parameters.getEndpoint(),
awsConf, getConf().getTrimmed(AWS_REGION));
configureEndpoint(builder, epr);
// Create cryptoConfig.
CryptoConfigurationV2 cryptoConfigurationV2 =
new CryptoConfigurationV2(CryptoMode.AuthenticatedEncryption)
.withRangeGetMode(CryptoRangeGetMode.ALL);
if (epr != null) {
cryptoConfigurationV2
.withAwsKmsRegion(RegionUtils.getRegion(epr.getSigningRegion()));
LOG.debug("KMS region used: {}", cryptoConfigurationV2.getAwsKmsRegion());
}
builder.withCryptoConfiguration(cryptoConfigurationV2);
client = builder.build();
IGNORE_CSE_WARN.info("S3 client-side encryption enabled: Ignore S3-CSE "
+ "Warnings.");
return client;
}
/**
* Use the Builder API to create an AWS S3 client.
* <p>
* This has a more complex endpoint configuration mechanism
* which initially caused problems; the
* {@code withForceGlobalBucketAccessEnabled(true)}
* command is critical here.
* @param awsConf AWS configuration
* @param parameters parameters
* @return new AmazonS3 client
* @throws SdkClientException if the configuration is invalid.
*/
protected AmazonS3 buildAmazonS3Client(
final ClientConfiguration awsConf,
final S3ClientCreationParameters parameters) {
AmazonS3ClientBuilder b = AmazonS3Client.builder();
configureBasicParams(b, awsConf, parameters);
// endpoint set up is a PITA
AwsClientBuilder.EndpointConfiguration epr
= createEndpointConfiguration(parameters.getEndpoint(),
awsConf, getConf().getTrimmed(AWS_REGION));
configureEndpoint(b, epr);
final AmazonS3 client = b.build();
return client;
}
/**
* A method to configure basic AmazonS3Builder parameters.
*
* @param builder Instance of AmazonS3Builder used.
* @param awsConf ClientConfiguration used.
* @param parameters Parameters used to set in the builder.
*/
private void configureBasicParams(AmazonS3Builder builder,
ClientConfiguration awsConf, S3ClientCreationParameters parameters) {
builder.withCredentials(parameters.getCredentialSet());
builder.withClientConfiguration(awsConf);
builder.withPathStyleAccessEnabled(parameters.isPathStyleAccess());
if (parameters.getMetrics() != null) {
builder.withMetricsCollector(
clientOverrideConfigBuilder.addMetricPublisher(
new AwsStatisticsCollector(parameters.getMetrics()));
}
if (parameters.getRequestHandlers() != null) {
builder.withRequestHandlers(
parameters.getRequestHandlers().toArray(new RequestHandler2[0]));
}
if (parameters.getMonitoringListener() != null) {
builder.withMonitoringListener(parameters.getMonitoringListener());
}
final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf);
clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build());
return clientOverrideConfigBuilder.build();
}
/**
* A method to configure endpoint and Region for an AmazonS3Builder.
*
* @param builder Instance of AmazonS3Builder used.
* @param epr EndpointConfiguration used to set in builder.
*/
private void configureEndpoint(
AmazonS3Builder builder,
AmazonS3Builder.EndpointConfiguration epr) {
if (epr != null) {
// an endpoint binding was constructed: use it.
builder.withEndpointConfiguration(epr);
} else {
// no idea what the endpoint is, so tell the SDK
// to work it out at the cost of an extra HEAD request
builder.withForceGlobalBucketAccessEnabled(true);
// HADOOP-17771 force set the region so the build process doesn't halt.
String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
if (!region.isEmpty()) {
// there's either an explicit region or we have fallen back
// to the central one.
LOG.debug("Using default endpoint; setting region to {}", region);
builder.setRegion(region);
} else {
// no region.
// allow this if people really want it; it is OK to rely on this
// when deployed in EC2.
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
LOG.debug(SDK_REGION_CHAIN_IN_USE);
}
}
}
/**
* Configure classic S3 client.
* <p>
* This includes: endpoint, Path Access and possibly other
* options.
*
* @param s3 S3 Client.
* @param endPoint s3 endpoint, may be empty
* @param pathStyleAccess enable path style access?
* @return S3 client
* @throws IllegalArgumentException if misconfigured
*/
protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
final String endPoint,
final boolean pathStyleAccess)
throws IllegalArgumentException {
if (!endPoint.isEmpty()) {
try {
s3.setEndpoint(endPoint);
} catch (IllegalArgumentException e) {
String msg = "Incorrect endpoint: " + e.getMessage();
LOG.error(msg);
throw new IllegalArgumentException(msg, e);
}
}
if (pathStyleAccess) {
LOG.debug("Enabling path style access!");
s3.setS3ClientOptions(S3ClientOptions.builder()
.setPathStyleAccess(true)
.build());
}
return s3;
}
/**
* Given an endpoint string, return an endpoint config, or null, if none
* is needed.
* <p>
* This is a pretty painful piece of code. It is trying to replicate
* what AwsClient.setEndpoint() does, because you can't
* call that setter on an AwsClient constructed via
* the builder, and you can't pass a metrics collector
* down except through the builder.
* <p>
* Note also that AWS signing is a mystery which nobody fully
* understands, especially given all problems surface in a
* "400 bad request" response, which, like all security systems,
* provides minimal diagnostics out of fear of leaking
* secrets.
* Given a endpoint string, create the endpoint URI.
*
* @param endpoint possibly null endpoint.
* @param awsConf config to build the URI from.
* @param awsRegion AWS S3 Region if the corresponding config is set.
* @return a configuration for the S3 client builder.
* @param conf config to build the URI from.
* @return an endpoint uri
*/
@VisibleForTesting
public static AwsClientBuilder.EndpointConfiguration
createEndpointConfiguration(
final String endpoint, final ClientConfiguration awsConf,
String awsRegion) {
LOG.debug("Creating endpoint configuration for \"{}\"", endpoint);
private static URI getS3Endpoint(String endpoint, final Configuration conf) {
boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS);
String protocol = secureConnections ? "https" : "http";
if (endpoint == null || endpoint.isEmpty()) {
// the default endpoint...we should be using null at this point.
LOG.debug("Using default endpoint -no need to generate a configuration");
// don't set an endpoint if none is configured, instead let the SDK figure it out.
return null;
}
final URI epr = RuntimeHttpUtils.toUri(endpoint, awsConf);
LOG.debug("Endpoint URI = {}", epr);
String region = awsRegion;
if (StringUtils.isBlank(region)) {
if (!ServiceUtils.isS3USStandardEndpoint(endpoint)) {
LOG.debug("Endpoint {} is not the default; parsing", epr);
region = AwsHostNameUtils.parseRegion(
epr.getHost(),
S3_SERVICE_NAME);
} else {
// US-east, set region == null.
LOG.debug("Endpoint {} is the standard one; declare region as null",
epr);
region = null;
}
if (!endpoint.contains("://")) {
endpoint = String.format("%s://%s", protocol, endpoint);
}
try {
return new URI(endpoint);
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
LOG.debug("Region for endpoint {}, URI {} is determined as {}",
endpoint, epr, region);
return new AwsClientBuilder.EndpointConfiguration(endpoint, region);
}
}

View File

@ -36,7 +36,7 @@ public class FailureInjectionPolicy {
public static final String DEFAULT_DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME";
private static final Logger LOG =
LoggerFactory.getLogger(InconsistentAmazonS3Client.class);
LoggerFactory.getLogger(FailureInjectionPolicy.class);
/**
* Probability of throttling a request.

View File

@ -1,345 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.CompleteMultipartUploadResult;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.DeleteObjectsResult;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
import com.amazonaws.services.s3.model.ListMultipartUploadsRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.MultipartUploadListing;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
/**
* A wrapper around {@link com.amazonaws.services.s3.AmazonS3} that injects
* failures.
* It used to also inject inconsistency, but this was removed with S3Guard;
* what is retained is the ability to throttle AWS operations and for the
* input stream to be inconsistent.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class InconsistentAmazonS3Client extends AmazonS3Client {
private static final Logger LOG =
LoggerFactory.getLogger(InconsistentAmazonS3Client.class);
private FailureInjectionPolicy policy;
/**
* Counter of failures since last reset.
*/
private final AtomicLong failureCounter = new AtomicLong(0);
/**
* Instantiate.
* This subclasses a deprecated constructor of the parent
* {@code AmazonS3Client} class; we can't use the builder API because,
* that only creates the consistent client.
* @param credentials credentials to auth.
* @param clientConfiguration connection settings
* @param conf hadoop configuration.
*/
@SuppressWarnings("deprecation")
public InconsistentAmazonS3Client(AWSCredentialsProvider credentials,
ClientConfiguration clientConfiguration, Configuration conf) {
super(credentials, clientConfiguration);
policy = new FailureInjectionPolicy(conf);
}
/**
* A way for tests to patch in a different fault injection policy at runtime.
* @param fs filesystem under test
* @param policy failure injection settings to set
* @throws Exception on failure
*/
public static void setFailureInjectionPolicy(S3AFileSystem fs,
FailureInjectionPolicy policy) throws Exception {
AmazonS3 s3 = fs.getAmazonS3ClientForTesting("s3guard");
InconsistentAmazonS3Client ic = InconsistentAmazonS3Client.castFrom(s3);
ic.replacePolicy(policy);
}
private void replacePolicy(FailureInjectionPolicy pol) {
this.policy = pol;
}
@Override
public String toString() {
return String.format("Inconsistent S3 Client: %s; failure count %d",
policy, failureCounter.get());
}
/**
* Convenience function for test code to cast from supertype.
* @param c supertype to cast from
* @return subtype, not null
* @throws Exception on error
*/
public static InconsistentAmazonS3Client castFrom(AmazonS3 c) throws
Exception {
InconsistentAmazonS3Client ic = null;
if (c instanceof InconsistentAmazonS3Client) {
ic = (InconsistentAmazonS3Client) c;
}
Preconditions.checkNotNull(ic, "Not an instance of " +
"InconsistentAmazonS3Client");
return ic;
}
@Override
public DeleteObjectsResult deleteObjects(DeleteObjectsRequest
deleteObjectsRequest)
throws AmazonClientException, AmazonServiceException {
maybeFail();
return super.deleteObjects(deleteObjectsRequest);
}
@Override
public void deleteObject(DeleteObjectRequest deleteObjectRequest)
throws AmazonClientException, AmazonServiceException {
String key = deleteObjectRequest.getKey();
LOG.debug("key {}", key);
maybeFail();
super.deleteObject(deleteObjectRequest);
}
/* We should only need to override this version of putObject() */
@Override
public PutObjectResult putObject(PutObjectRequest putObjectRequest)
throws AmazonClientException, AmazonServiceException {
LOG.debug("key {}", putObjectRequest.getKey());
maybeFail();
return super.putObject(putObjectRequest);
}
/* We should only need to override these versions of listObjects() */
@Override
public ObjectListing listObjects(ListObjectsRequest listObjectsRequest)
throws AmazonClientException, AmazonServiceException {
maybeFail();
return super.listObjects(listObjectsRequest);
}
/* consistent listing with possibility of failing. */
@Override
public ListObjectsV2Result listObjectsV2(ListObjectsV2Request request)
throws AmazonClientException, AmazonServiceException {
maybeFail();
return super.listObjectsV2(request);
}
@Override
public CompleteMultipartUploadResult completeMultipartUpload(
CompleteMultipartUploadRequest completeMultipartUploadRequest)
throws SdkClientException, AmazonServiceException {
maybeFail();
return super.completeMultipartUpload(completeMultipartUploadRequest);
}
@Override
public UploadPartResult uploadPart(UploadPartRequest uploadPartRequest)
throws SdkClientException, AmazonServiceException {
maybeFail();
return super.uploadPart(uploadPartRequest);
}
@Override
public InitiateMultipartUploadResult initiateMultipartUpload(
InitiateMultipartUploadRequest initiateMultipartUploadRequest)
throws SdkClientException, AmazonServiceException {
maybeFail();
return super.initiateMultipartUpload(initiateMultipartUploadRequest);
}
@Override
public MultipartUploadListing listMultipartUploads(
ListMultipartUploadsRequest listMultipartUploadsRequest)
throws SdkClientException, AmazonServiceException {
maybeFail();
return super.listMultipartUploads(listMultipartUploadsRequest);
}
/**
* Set the probability of throttling a request.
* @param throttleProbability the probability of a request being throttled.
*/
public void setThrottleProbability(float throttleProbability) {
policy.setThrottleProbability(throttleProbability);
}
/**
* Conditionally fail the operation.
* @param errorMsg description of failure
* @param statusCode http status code for error
* @throws AmazonClientException if the client chooses to fail
* the request.
*/
private void maybeFail(String errorMsg, int statusCode)
throws AmazonClientException {
// code structure here is to line up for more failures later
AmazonServiceException ex = null;
if (FailureInjectionPolicy.trueWithProbability(policy.getThrottleProbability())) {
// throttle the request
ex = new AmazonServiceException(errorMsg
+ " count = " + (failureCounter.get() + 1), null);
ex.setStatusCode(statusCode);
}
int failureLimit = policy.getFailureLimit();
if (ex != null) {
long count = failureCounter.incrementAndGet();
if (failureLimit == 0
|| (failureLimit > 0 && count < failureLimit)) {
throw ex;
}
}
}
private void maybeFail() {
maybeFail("throttled", 503);
}
/**
* Set the limit on failures before all operations pass through.
* This resets the failure count.
* @param limit limit; "0" means "no limit"
*/
public void setFailureLimit(int limit) {
policy.setFailureLimit(limit);
failureCounter.set(0);
}
@Override
public S3Object getObject(GetObjectRequest var1) throws SdkClientException,
AmazonServiceException {
maybeFail();
return super.getObject(var1);
}
@Override
public S3Object getObject(String bucketName, String key)
throws SdkClientException, AmazonServiceException {
maybeFail();
return super.getObject(bucketName, key);
}
/** Since ObjectListing is immutable, we just override it with wrapper. */
@SuppressWarnings("serial")
private static class CustomObjectListing extends ObjectListing {
private final List<S3ObjectSummary> customListing;
private final List<String> customPrefixes;
CustomObjectListing(ObjectListing rawListing,
List<S3ObjectSummary> customListing,
List<String> customPrefixes) {
super();
this.customListing = customListing;
this.customPrefixes = customPrefixes;
this.setBucketName(rawListing.getBucketName());
this.setCommonPrefixes(rawListing.getCommonPrefixes());
this.setDelimiter(rawListing.getDelimiter());
this.setEncodingType(rawListing.getEncodingType());
this.setMarker(rawListing.getMarker());
this.setMaxKeys(rawListing.getMaxKeys());
this.setNextMarker(rawListing.getNextMarker());
this.setPrefix(rawListing.getPrefix());
this.setTruncated(rawListing.isTruncated());
}
@Override
public List<S3ObjectSummary> getObjectSummaries() {
return customListing;
}
@Override
public List<String> getCommonPrefixes() {
return customPrefixes;
}
}
@SuppressWarnings("serial")
private static class CustomListObjectsV2Result extends ListObjectsV2Result {
private final List<S3ObjectSummary> customListing;
private final List<String> customPrefixes;
CustomListObjectsV2Result(ListObjectsV2Result raw,
List<S3ObjectSummary> customListing, List<String> customPrefixes) {
super();
this.customListing = customListing;
this.customPrefixes = customPrefixes;
this.setBucketName(raw.getBucketName());
this.setCommonPrefixes(raw.getCommonPrefixes());
this.setDelimiter(raw.getDelimiter());
this.setEncodingType(raw.getEncodingType());
this.setStartAfter(raw.getStartAfter());
this.setMaxKeys(raw.getMaxKeys());
this.setContinuationToken(raw.getContinuationToken());
this.setPrefix(raw.getPrefix());
this.setTruncated(raw.isTruncated());
}
@Override
public List<S3ObjectSummary> getObjectSummaries() {
return customListing;
}
@Override
public List<String> getCommonPrefixes() {
return customPrefixes;
}
}
}

View File

@ -18,11 +18,19 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.services.s3.AmazonS3;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.core.interceptor.Context;
import software.amazon.awssdk.core.interceptor.ExecutionAttributes;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
/**
* S3 Client factory used for testing with eventual consistency fault injection.
@ -30,25 +38,74 @@ import org.apache.hadoop.classification.InterfaceStability;
* {@code hadoop-aws} module to enable integration tests to use this
* just by editing the Hadoop configuration used to bring up the client.
*
* The factory uses the older constructor-based instantiation/configuration
* of the client, so does not wire up metrics, handlers etc.
* The factory injects an {@link ExecutionInterceptor} to inject failures.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
@Override
protected AmazonS3 buildAmazonS3Client(
final ClientConfiguration awsConf,
final S3ClientCreationParameters parameters) {
protected ClientOverrideConfiguration createClientOverrideConfiguration(
S3ClientCreationParameters parameters, Configuration conf) throws IOException {
LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
LOG.warn("List inconsistency is no longer emulated; only throttling and read errors");
InconsistentAmazonS3Client s3
= new InconsistentAmazonS3Client(
parameters.getCredentialSet(), awsConf, getConf());
configureAmazonS3Client(s3,
parameters.getEndpoint(),
parameters.isPathStyleAccess());
return s3;
return super.createClientOverrideConfiguration(parameters, conf)
.toBuilder()
.addExecutionInterceptor(new FailureInjectionInterceptor(
new FailureInjectionPolicy(conf)))
.build();
}
private static class FailureInjectionInterceptor implements ExecutionInterceptor {
private final FailureInjectionPolicy policy;
/**
* Counter of failures since last reset.
*/
private final AtomicLong failureCounter = new AtomicLong(0);
FailureInjectionInterceptor(FailureInjectionPolicy policy) {
this.policy = policy;
}
@Override
public void beforeExecution(Context.BeforeExecution context,
ExecutionAttributes executionAttributes) {
maybeFail();
}
private void maybeFail() {
maybeFail("throttled", 503);
}
/**
* Conditionally fail the operation.
* @param errorMsg description of failure
* @param statusCode http status code for error
* @throws SdkException if the client chooses to fail
* the request.
*/
private void maybeFail(String errorMsg, int statusCode)
throws SdkException {
// code structure here is to line up for more failures later
AwsServiceException ex = null;
if (FailureInjectionPolicy.trueWithProbability(policy.getThrottleProbability())) {
// throttle the request
ex = AwsServiceException.builder()
.message(errorMsg + " count = " + (failureCounter.get() + 1))
.statusCode(statusCode)
.build();
}
int failureLimit = policy.getFailureLimit();
if (ex != null) {
long count = failureCounter.incrementAndGet();
if (failureLimit == 0
|| (failureLimit > 0 && count < failureLimit)) {
throw ex;
}
}
}
}
}

View File

@ -24,8 +24,7 @@ import java.util.Optional;
import java.util.concurrent.Future;
import javax.annotation.Nullable;
import com.amazonaws.AmazonClientException;
import com.amazonaws.SdkBaseException;
import software.amazon.awssdk.core.exception.SdkException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -39,6 +38,7 @@ import org.apache.hadoop.util.functional.FutureIO;
import org.apache.hadoop.util.functional.InvocationRaisingIOE;
import org.apache.hadoop.util.Preconditions;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration;
/**
@ -120,7 +120,7 @@ public class Invoker {
throws IOException {
try (DurationInfo ignored = new DurationInfo(LOG, false, "%s", action)) {
return operation.apply();
} catch (AmazonClientException e) {
} catch (SdkException e) {
throw S3AUtils.translateException(action, path, e);
}
}
@ -145,7 +145,7 @@ public class Invoker {
throws IOException {
try {
return invokeTrackingDuration(tracker, operation);
} catch (AmazonClientException e) {
} catch (SdkException e) {
throw S3AUtils.translateException(action, path, e);
}
}
@ -170,7 +170,7 @@ public class Invoker {
/**
*
* Wait for a future, translating AmazonClientException into an IOException.
* Wait for a future, translating SdkException into an IOException.
* @param action action to execute (used in error messages)
* @param path path of work (used in error messages)
* @param future future to await for
@ -186,7 +186,7 @@ public class Invoker {
throws IOException {
try (DurationInfo ignored = new DurationInfo(LOG, false, "%s", action)) {
return FutureIO.awaitFuture(future);
} catch (AmazonClientException e) {
} catch (SdkException e) {
throw S3AUtils.translateException(action, path, e);
}
}
@ -444,7 +444,7 @@ public class Invoker {
* @param operation operation to execute
* @return the result of the call
* @throws IOException any IOE raised
* @throws SdkBaseException any AWS exception raised
* @throws SdkException any AWS exception raised
* @throws RuntimeException : these are never caught and retries.
*/
@Retries.RetryRaw
@ -466,7 +466,7 @@ public class Invoker {
}
// execute the operation, returning if successful
return operation.apply();
} catch (IOException | SdkBaseException e) {
} catch (IOException | SdkException e) {
caught = e;
}
// you only get here if the operation didn't complete
@ -478,7 +478,7 @@ public class Invoker {
translated = (IOException) caught;
} else {
translated = S3AUtils.translateException(text, "",
(SdkBaseException)caught);
(SdkException) caught);
}
try {
@ -517,11 +517,10 @@ public class Invoker {
if (caught instanceof IOException) {
throw (IOException) caught;
} else {
throw (SdkBaseException) caught;
throw (SdkException) caught;
}
}
/**
* Execute an operation; any exception raised is simply caught and
* logged at debug.

View File

@ -18,7 +18,8 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import software.amazon.awssdk.services.s3.model.CommonPrefix;
import software.amazon.awssdk.services.s3.model.S3Object;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.VisibleForTesting;
@ -277,19 +278,19 @@ public class Listing extends AbstractStoreOperation {
}
/**
* Interface to implement by the logic deciding whether to accept a summary
* Interface to implement the logic deciding whether to accept a s3Object
* entry or path as a valid file or directory.
*/
interface FileStatusAcceptor {
/**
* Predicate to decide whether or not to accept a summary entry.
* Predicate to decide whether or not to accept a s3Object entry.
* @param keyPath qualified path to the entry
* @param summary summary entry
* @param s3Object s3Object entry
* @return true if the entry is accepted (i.e. that a status entry
* should be generated.
*/
boolean accept(Path keyPath, S3ObjectSummary summary);
boolean accept(Path keyPath, S3Object s3Object);
/**
* Predicate to decide whether or not to accept a prefix.
@ -451,21 +452,21 @@ public class Listing extends AbstractStoreOperation {
int added = 0, ignored = 0;
// list to fill in with results. Initial size will be list maximum.
List<S3AFileStatus> stats = new ArrayList<>(
objects.getObjectSummaries().size() +
objects.getS3Objects().size() +
objects.getCommonPrefixes().size());
// objects
for (S3ObjectSummary summary : objects.getObjectSummaries()) {
String key = summary.getKey();
for (S3Object s3Object : objects.getS3Objects()) {
String key = s3Object.key();
Path keyPath = getStoreContext().getContextAccessors().keyToPath(key);
if (LOG.isDebugEnabled()) {
LOG.debug("{}: {}", keyPath, stringify(summary));
LOG.debug("{}: {}", keyPath, stringify(s3Object));
}
// Skip over keys that are ourselves and old S3N _$folder$ files
if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) {
S3AFileStatus status = createFileStatus(keyPath, summary,
if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) {
S3AFileStatus status = createFileStatus(keyPath, s3Object,
listingOperationCallbacks.getDefaultBlockSize(keyPath),
getStoreContext().getUsername(),
summary.getETag(), null, isCSEEnabled);
s3Object.eTag(), null, isCSEEnabled);
LOG.debug("Adding: {}", status);
stats.add(status);
added++;
@ -476,11 +477,11 @@ public class Listing extends AbstractStoreOperation {
}
// prefixes: always directories
for (String prefix : objects.getCommonPrefixes()) {
for (CommonPrefix prefix : objects.getCommonPrefixes()) {
Path keyPath = getStoreContext()
.getContextAccessors()
.keyToPath(prefix);
if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) {
.keyToPath(prefix.prefix());
if (acceptor.accept(keyPath, prefix.prefix()) && filter.accept(keyPath)) {
S3AFileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath,
getStoreContext().getUsername());
LOG.debug("Adding directory: {}", status);
@ -731,18 +732,18 @@ public class Listing extends AbstractStoreOperation {
}
/**
* Reject a summary entry if the key path is the qualified Path, or
* Reject a s3Object entry if the key path is the qualified Path, or
* it ends with {@code "_$folder$"}.
* @param keyPath key path of the entry
* @param summary summary entry
* @param s3Object s3Object entry
* @return true if the entry is accepted (i.e. that a status entry
* should be generated.
*/
@Override
public boolean accept(Path keyPath, S3ObjectSummary summary) {
public boolean accept(Path keyPath, S3Object s3Object) {
return !keyPath.equals(qualifiedPath)
&& !summary.getKey().endsWith(S3N_FOLDER_SUFFIX)
&& !objectRepresentsDirectory(summary.getKey());
&& !s3Object.key().endsWith(S3N_FOLDER_SUFFIX)
&& !objectRepresentsDirectory(s3Object.key());
}
/**
@ -767,8 +768,8 @@ public class Listing extends AbstractStoreOperation {
*/
static class AcceptAllButS3nDirs implements FileStatusAcceptor {
public boolean accept(Path keyPath, S3ObjectSummary summary) {
return !summary.getKey().endsWith(S3N_FOLDER_SUFFIX);
public boolean accept(Path keyPath, S3Object s3Object) {
return !s3Object.key().endsWith(S3N_FOLDER_SUFFIX);
}
public boolean accept(Path keyPath, String prefix) {
@ -799,17 +800,17 @@ public class Listing extends AbstractStoreOperation {
}
/**
* Reject a summary entry if the key path is the qualified Path, or
* Reject a s3Object entry if the key path is the qualified Path, or
* it ends with {@code "_$folder$"}.
* @param keyPath key path of the entry
* @param summary summary entry
* @param s3Object s3Object entry
* @return true if the entry is accepted (i.e. that a status entry
* should be generated.)
*/
@Override
public boolean accept(Path keyPath, S3ObjectSummary summary) {
public boolean accept(Path keyPath, S3Object s3Object) {
return !keyPath.equals(qualifiedPath) &&
!summary.getKey().endsWith(S3N_FOLDER_SUFFIX);
!s3Object.key().endsWith(S3N_FOLDER_SUFFIX);
}
/**

View File

@ -23,13 +23,14 @@ import java.util.ListIterator;
import java.util.NoSuchElementException;
import javax.annotation.Nullable;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.ListMultipartUploadsRequest;
import com.amazonaws.services.s3.model.MultipartUpload;
import com.amazonaws.services.s3.model.MultipartUploadListing;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest;
import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse;
import software.amazon.awssdk.services.s3.model.MultipartUpload;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.api.RequestFactory;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
@ -43,7 +44,7 @@ import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDura
* MultipartUtils upload-specific functions for use by S3AFileSystem and Hadoop
* CLI.
* The Audit span active when
* {@link #listMultipartUploads(StoreContext, AmazonS3, String, int)}
* {@link #listMultipartUploads(StoreContext, S3Client, String, int)}
* was invoked is retained for all subsequent operations.
*/
public final class MultipartUtils {
@ -67,7 +68,7 @@ public final class MultipartUtils {
*/
static MultipartUtils.UploadIterator listMultipartUploads(
final StoreContext storeContext,
AmazonS3 s3,
S3Client s3,
@Nullable String prefix,
int maxKeys)
throws IOException {
@ -84,14 +85,14 @@ public final class MultipartUtils {
* at the time the iterator was constructed.
*/
static class ListingIterator implements
RemoteIterator<MultipartUploadListing> {
RemoteIterator<ListMultipartUploadsResponse> {
private final String prefix;
private final RequestFactory requestFactory;
private final int maxKeys;
private final AmazonS3 s3;
private final S3Client s3;
private final Invoker invoker;
private final AuditSpan auditSpan;
@ -101,7 +102,7 @@ public final class MultipartUtils {
/**
* Most recent listing results.
*/
private MultipartUploadListing listing;
private ListMultipartUploadsResponse listing;
/**
* Indicator that this is the first listing.
@ -114,7 +115,7 @@ public final class MultipartUtils {
private int listCount = 0;
ListingIterator(final StoreContext storeContext,
AmazonS3 s3,
S3Client s3,
@Nullable String prefix,
int maxKeys) throws IOException {
this.storeContext = storeContext;
@ -153,7 +154,7 @@ public final class MultipartUtils {
*/
@Override
@Retries.RetryTranslated
public MultipartUploadListing next() throws IOException {
public ListMultipartUploadsResponse next() throws IOException {
if (firstListing) {
firstListing = false;
} else {
@ -171,32 +172,34 @@ public final class MultipartUtils {
public String toString() {
return "Upload iterator: prefix " + prefix
+ "; list count " + listCount
+ "; upload count " + listing.getMultipartUploads().size()
+ "; upload count " + listing.uploads().size()
+ "; isTruncated=" + listing.isTruncated();
}
@Retries.RetryTranslated
private void requestNextBatch() throws IOException {
try (AuditSpan span = auditSpan.activate()) {
ListMultipartUploadsRequest req = requestFactory
.newListMultipartUploadsRequest(prefix);
ListMultipartUploadsRequest.Builder requestBuilder = requestFactory
.newListMultipartUploadsRequestBuilder(prefix);
if (!firstListing) {
req.setKeyMarker(listing.getNextKeyMarker());
req.setUploadIdMarker(listing.getNextUploadIdMarker());
requestBuilder.keyMarker(listing.nextKeyMarker());
requestBuilder.uploadIdMarker(listing.nextUploadIdMarker());
}
req.setMaxUploads(maxKeys);
requestBuilder.maxUploads(maxKeys);
ListMultipartUploadsRequest request = requestBuilder.build();
LOG.debug("[{}], Requesting next {} uploads prefix {}, " +
"next key {}, next upload id {}", listCount, maxKeys, prefix,
req.getKeyMarker(), req.getUploadIdMarker());
request.keyMarker(), request.uploadIdMarker());
listCount++;
listing = invoker.retry("listMultipartUploads", prefix, true,
trackDurationOfOperation(storeContext.getInstrumentation(),
MULTIPART_UPLOAD_LIST.getSymbol(),
() -> s3.listMultipartUploads(req)));
() -> s3.listMultipartUploads(requestBuilder.build())));
LOG.debug("Listing found {} upload(s)",
listing.getMultipartUploads().size());
listing.uploads().size());
LOG.debug("New listing state: {}", this);
}
}
@ -216,14 +219,14 @@ public final class MultipartUtils {
*/
private ListingIterator lister;
/** Current listing: the last upload listing we fetched. */
private MultipartUploadListing listing;
private ListMultipartUploadsResponse listing;
/** Iterator over the current listing. */
private ListIterator<MultipartUpload> batchIterator;
@Retries.RetryTranslated
public UploadIterator(
final StoreContext storeContext,
AmazonS3 s3,
S3Client s3,
int maxKeys,
@Nullable String prefix)
throws IOException {
@ -249,7 +252,7 @@ public final class MultipartUtils {
private boolean requestNextBatch() throws IOException {
if (lister.hasNext()) {
listing = lister.next();
batchIterator = listing.getMultipartUploads().listIterator();
batchIterator = listing.uploads().listIterator();
return batchIterator.hasNext();
}
return false;

View File

@ -18,59 +18,55 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.event.ProgressEvent;
import com.amazonaws.event.ProgressEventType;
import com.amazonaws.event.ProgressListener;
import com.amazonaws.services.s3.transfer.Upload;
import software.amazon.awssdk.transfer.s3.model.ObjectTransfer;
import software.amazon.awssdk.transfer.s3.progress.TransferListener;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import static com.amazonaws.event.ProgressEventType.TRANSFER_COMPLETED_EVENT;
import static com.amazonaws.event.ProgressEventType.TRANSFER_PART_STARTED_EVENT;
/**
* Listener to progress from AWS regarding transfers.
*/
public class ProgressableProgressListener implements ProgressListener {
public class ProgressableProgressListener implements TransferListener {
private static final Logger LOG = S3AFileSystem.LOG;
private final S3AFileSystem fs;
private final String key;
private final Progressable progress;
private long lastBytesTransferred;
private final Upload upload;
/**
* Instantiate.
* @param fs filesystem: will be invoked with statistics updates
* @param key key for the upload
* @param upload source of events
* @param progress optional callback for progress.
*/
public ProgressableProgressListener(S3AFileSystem fs,
String key,
Upload upload,
Progressable progress) {
this.fs = fs;
this.key = key;
this.upload = upload;
this.progress = progress;
this.lastBytesTransferred = 0;
}
@Override
public void progressChanged(ProgressEvent progressEvent) {
if (progress != null) {
public void transferInitiated(TransferListener.Context.TransferInitiated context) {
fs.incrementWriteOperations();
}
@Override
public void transferComplete(TransferListener.Context.TransferComplete context) {
fs.incrementWriteOperations();
}
@Override
public void bytesTransferred(TransferListener.Context.BytesTransferred context) {
if(progress != null) {
progress.progress();
}
// There are 3 http ops here, but this should be close enough for now
ProgressEventType pet = progressEvent.getEventType();
if (pet == TRANSFER_PART_STARTED_EVENT ||
pet == TRANSFER_COMPLETED_EVENT) {
fs.incrementWriteOperations();
}
long transferred = upload.getProgress().getBytesTransferred();
long transferred = context.progressSnapshot().transferredBytes();
long delta = transferred - lastBytesTransferred;
fs.incrementPutProgressStatistics(key, delta);
lastBytesTransferred = transferred;
@ -81,9 +77,10 @@ public class ProgressableProgressListener implements ProgressListener {
* This can handle race conditions in setup/teardown.
* @return the number of bytes which were transferred after the notification
*/
public long uploadCompleted() {
long delta = upload.getProgress().getBytesTransferred() -
lastBytesTransferred;
public long uploadCompleted(ObjectTransfer upload) {
long delta =
upload.progress().snapshot().transferredBytes() - lastBytesTransferred;
if (delta > 0) {
LOG.debug("S3A write delta changed after finished: {} bytes", delta);
fs.incrementPutProgressStatistics(key, delta);

View File

@ -31,15 +31,16 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import com.amazonaws.SdkBaseException;
import com.amazonaws.event.ProgressEvent;
import com.amazonaws.event.ProgressEventType;
import com.amazonaws.event.ProgressListener;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.PutObjectResponse;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.services.s3.model.UploadPartResponse;
import org.apache.hadoop.fs.s3a.impl.ProgressListener;
import org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent;
import org.apache.hadoop.fs.s3a.impl.PutObjectOptions;
import org.apache.hadoop.fs.statistics.IOStatisticsAggregator;
import org.apache.hadoop.util.Preconditions;
@ -69,6 +70,7 @@ import org.apache.hadoop.util.Progressable;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.s3a.S3AUtils.*;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent.*;
import static org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext.EMPTY_BLOCK_OUTPUT_STREAM_STATISTICS;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation;
@ -192,7 +194,7 @@ class S3ABlockOutputStream extends OutputStream implements
this.executorService = MoreExecutors.listeningDecorator(
builder.executorService);
this.multiPartUpload = null;
final Progressable progress = builder.progress;
Progressable progress = builder.progress;
this.progressListener = (progress instanceof ProgressListener) ?
(ProgressListener) progress
: new ProgressableListener(progress);
@ -439,7 +441,7 @@ class S3ABlockOutputStream extends OutputStream implements
uploadCurrentBlock(true);
}
// wait for the partial uploads to finish
final List<PartETag> partETags =
final List<CompletedPart> partETags =
multiPartUpload.waitForAllPartUploads();
bytes = bytesSubmitted;
@ -597,27 +599,28 @@ class S3ABlockOutputStream extends OutputStream implements
final PutObjectRequest putObjectRequest = uploadData.hasFile() ?
writeOperationHelper.createPutObjectRequest(
key,
uploadData.getFile(),
builder.putOptions)
uploadData.getFile().length(),
builder.putOptions,
true)
: writeOperationHelper.createPutObjectRequest(
key,
uploadData.getUploadStream(),
size,
builder.putOptions);
BlockUploadProgress callback =
new BlockUploadProgress(
block, progressListener, now());
putObjectRequest.setGeneralProgressListener(callback);
builder.putOptions,
false);
BlockUploadProgress progressCallback =
new BlockUploadProgress(block, progressListener, now());
statistics.blockUploadQueued(size);
ListenableFuture<PutObjectResult> putObjectResult =
ListenableFuture<PutObjectResponse> putObjectResult =
executorService.submit(() -> {
try {
// the putObject call automatically closes the input
// stream afterwards.
return writeOperationHelper.putObject(
putObjectRequest,
builder.putOptions,
statistics);
PutObjectResponse response =
writeOperationHelper.putObject(putObjectRequest, builder.putOptions, uploadData,
uploadData.hasFile(), statistics);
progressCallback.progressChanged(REQUEST_BYTE_TRANSFER_EVENT);
return response;
} finally {
cleanupWithLogger(LOG, uploadData, block);
}
@ -761,7 +764,7 @@ class S3ABlockOutputStream extends OutputStream implements
*/
private class MultiPartUpload {
private final String uploadId;
private final List<ListenableFuture<PartETag>> partETagsFutures;
private final List<ListenableFuture<CompletedPart>> partETagsFutures;
private int partsSubmitted;
private int partsUploaded;
private long bytesSubmitted;
@ -866,18 +869,19 @@ class S3ABlockOutputStream extends OutputStream implements
final int currentPartNumber = partETagsFutures.size() + 1;
final UploadPartRequest request;
final S3ADataBlocks.BlockUploadData uploadData;
final RequestBody requestBody;
try {
uploadData = block.startUpload();
request = writeOperationHelper.newUploadPartRequest(
requestBody = uploadData.hasFile()
? RequestBody.fromFile(uploadData.getFile())
: RequestBody.fromInputStream(uploadData.getUploadStream(), size);
request = writeOperationHelper.newUploadPartRequestBuilder(
key,
uploadId,
currentPartNumber,
size,
uploadData.getUploadStream(),
uploadData.getFile(),
0L);
request.setLastPart(isLast);
} catch (SdkBaseException aws) {
size).build();
} catch (SdkException aws) {
// catch and translate
IOException e = translateException("upload", key, aws);
// failure to start the upload.
@ -888,28 +892,38 @@ class S3ABlockOutputStream extends OutputStream implements
noteUploadFailure(e);
throw e;
}
BlockUploadProgress callback =
new BlockUploadProgress(
block, progressListener, now());
request.setGeneralProgressListener(callback);
BlockUploadProgress progressCallback =
new BlockUploadProgress(block, progressListener, now());
statistics.blockUploadQueued(block.dataSize());
ListenableFuture<PartETag> partETagFuture =
ListenableFuture<CompletedPart> partETagFuture =
executorService.submit(() -> {
// this is the queued upload operation
// do the upload
try {
LOG.debug("Uploading part {} for id '{}'",
currentPartNumber, uploadId);
PartETag partETag = writeOperationHelper.uploadPart(request, statistics)
.getPartETag();
progressCallback.progressChanged(TRANSFER_PART_STARTED_EVENT);
UploadPartResponse response = writeOperationHelper
.uploadPart(request, requestBody, statistics);
LOG.debug("Completed upload of {} to part {}",
block, partETag.getETag());
block, response.eTag());
LOG.debug("Stream statistics of {}", statistics);
partsUploaded++;
return partETag;
progressCallback.progressChanged(TRANSFER_PART_COMPLETED_EVENT);
return CompletedPart.builder()
.eTag(response.eTag())
.partNumber(currentPartNumber)
.build();
} catch (IOException e) {
// save immediately.
noteUploadFailure(e);
progressCallback.progressChanged(TRANSFER_PART_FAILED_EVENT);
throw e;
} finally {
// close the stream and block
@ -924,7 +938,7 @@ class S3ABlockOutputStream extends OutputStream implements
* @return list of results
* @throws IOException IO Problems
*/
private List<PartETag> waitForAllPartUploads() throws IOException {
private List<CompletedPart> waitForAllPartUploads() throws IOException {
LOG.debug("Waiting for {} uploads to complete", partETagsFutures.size());
try {
return Futures.allAsList(partETagsFutures).get();
@ -948,7 +962,7 @@ class S3ABlockOutputStream extends OutputStream implements
*/
private void cancelAllActiveFutures() {
LOG.debug("Cancelling futures");
for (ListenableFuture<PartETag> future : partETagsFutures) {
for (ListenableFuture<CompletedPart> future : partETagsFutures) {
future.cancel(true);
}
}
@ -960,7 +974,7 @@ class S3ABlockOutputStream extends OutputStream implements
* @param partETags list of partial uploads
* @throws IOException on any problem
*/
private void complete(List<PartETag> partETags)
private void complete(List<CompletedPart> partETags)
throws IOException {
maybeRethrowUploadFailure();
AtomicInteger errorCount = new AtomicInteger(0);
@ -1005,22 +1019,24 @@ class S3ABlockOutputStream extends OutputStream implements
}
}
/**
* The upload progress listener registered for events returned
* during the upload of a single block.
* It updates statistics and handles the end of the upload.
* Transfer failures are logged at WARN.
*/
private final class BlockUploadProgress implements ProgressListener {
private final class BlockUploadProgress {
private final S3ADataBlocks.DataBlock block;
private final ProgressListener nextListener;
private final Instant transferQueueTime;
private Instant transferStartTime;
private long size;
/**
* Track the progress of a single block upload.
* @param block block to monitor
* @param nextListener optional next progress listener
* @param transferQueueTime time the block was transferred
* into the queue
*/
@ -1029,20 +1045,17 @@ class S3ABlockOutputStream extends OutputStream implements
Instant transferQueueTime) {
this.block = block;
this.transferQueueTime = transferQueueTime;
this.size = block.dataSize();
this.nextListener = nextListener;
}
@Override
public void progressChanged(ProgressEvent progressEvent) {
ProgressEventType eventType = progressEvent.getEventType();
long bytesTransferred = progressEvent.getBytesTransferred();
public void progressChanged(ProgressListenerEvent eventType) {
long size = block.dataSize();
switch (eventType) {
case REQUEST_BYTE_TRANSFER_EVENT:
// bytes uploaded
statistics.bytesTransferred(bytesTransferred);
statistics.bytesTransferred(size);
break;
case TRANSFER_PART_STARTED_EVENT:
@ -1057,6 +1070,7 @@ class S3ABlockOutputStream extends OutputStream implements
statistics.blockUploadCompleted(
Duration.between(transferStartTime, now()),
size);
statistics.bytesTransferred(size);
break;
case TRANSFER_PART_FAILED_EVENT:
@ -1071,13 +1085,13 @@ class S3ABlockOutputStream extends OutputStream implements
}
if (nextListener != null) {
nextListener.progressChanged(progressEvent);
nextListener.progressChanged(eventType, size);
}
}
}
/**
* Bridge from AWS {@code ProgressListener} to Hadoop {@link Progressable}.
* Bridge from {@link ProgressListener} to Hadoop {@link Progressable}.
*/
private static class ProgressableListener implements ProgressListener {
private final Progressable progress;
@ -1086,7 +1100,7 @@ class S3ABlockOutputStream extends OutputStream implements
this.progress = progress;
}
public void progressChanged(ProgressEvent progressEvent) {
public void progressChanged(ProgressListenerEvent eventType, int bytesTransferred) {
if (progress != null) {
progress.progress();
}

View File

@ -47,7 +47,7 @@ import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
* Set of classes to support output streaming into blocks which are then
* uploaded as to S3 as a single PUT, or as part of a multipart request.
*/
final class S3ADataBlocks {
public final class S3ADataBlocks {
private static final Logger LOG =
LoggerFactory.getLogger(S3ADataBlocks.class);
@ -101,7 +101,7 @@ final class S3ADataBlocks {
* It can be one of a file or an input stream.
* When closed, any stream is closed. Any source file is untouched.
*/
static final class BlockUploadData implements Closeable {
public static final class BlockUploadData implements Closeable {
private final File file;
private final InputStream uploadStream;
@ -109,7 +109,7 @@ final class S3ADataBlocks {
* File constructor; input stream will be null.
* @param file file to upload
*/
BlockUploadData(File file) {
public BlockUploadData(File file) {
Preconditions.checkArgument(file.exists(), "No file: " + file);
this.file = file;
this.uploadStream = null;
@ -119,7 +119,7 @@ final class S3ADataBlocks {
* Stream constructor, file field will be null.
* @param uploadStream stream to upload
*/
BlockUploadData(InputStream uploadStream) {
public BlockUploadData(InputStream uploadStream) {
Preconditions.checkNotNull(uploadStream, "rawUploadStream");
this.uploadStream = uploadStream;
this.file = null;

View File

@ -22,6 +22,7 @@ import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
import java.net.SocketTimeoutException;
import java.nio.ByteBuffer;
@ -31,9 +32,9 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.IntFunction;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -46,7 +47,6 @@ import org.apache.hadoop.fs.CanUnbuffer;
import org.apache.hadoop.fs.FSExceptionMessages;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileRange;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.StreamCapabilities;
import org.apache.hadoop.fs.impl.CombinedFileRange;
import org.apache.hadoop.fs.VectoredReadUtils;
@ -61,6 +61,7 @@ import org.apache.hadoop.fs.statistics.IOStatisticsSource;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.functional.CallableRaisingIOE;
import static java.util.Objects.requireNonNull;
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
import static org.apache.hadoop.fs.VectoredReadUtils.isOrderedDisjoint;
@ -125,14 +126,9 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
*/
private volatile boolean closed;
/**
* wrappedStream is associated with an object (instance of S3Object). When
* the object is garbage collected, the associated wrappedStream will be
* closed. Keep a reference to this object to prevent the wrapperStream
* still in use from being closed unexpectedly due to garbage collection.
* See HADOOP-17338 for details.
* Input stream returned by a getObject call.
*/
private S3Object object;
private S3ObjectInputStream wrappedStream;
private ResponseInputStream<GetObjectResponse> wrappedStream;
private final S3AReadOpContext context;
private final InputStreamCallbacks client;
@ -271,28 +267,22 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
uri, reason, targetPos, contentRangeFinish, length, pos, nextReadPos,
inputPolicy);
GetObjectRequest request = client.newGetRequestBuilder(key)
.range(S3AUtils.formatRange(targetPos, contentRangeFinish - 1))
.applyMutation(changeTracker::maybeApplyConstraint)
.build();
long opencount = streamStatistics.streamOpened();
GetObjectRequest request = client.newGetRequest(key)
.withRange(targetPos, contentRangeFinish - 1);
String operation = opencount == 0 ? OPERATION_OPEN : OPERATION_REOPEN;
String text = String.format("%s %s at %d",
operation, uri, targetPos);
changeTracker.maybeApplyConstraint(request);
object = onceTrackingDuration(text, uri,
wrappedStream = onceTrackingDuration(text, uri,
streamStatistics.initiateGetRequest(), () ->
client.getObject(request));
changeTracker.processResponse(object, operation,
changeTracker.processResponse(wrappedStream.response(), operation,
targetPos);
wrappedStream = object.getObjectContent();
contentRangeStart = targetPos;
if (wrappedStream == null) {
throw new PathIOException(uri,
"Null IO stream from " + operation + " of (" + reason + ") ");
}
contentRangeStart = targetPos;
this.pos = targetPos;
}
@ -505,14 +495,15 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
*/
@Retries.OnceTranslated
private void onReadFailure(IOException ioe, boolean forceAbort) {
GetObjectResponse objectResponse = wrappedStream == null ? null : wrappedStream.response();
if (LOG.isDebugEnabled()) {
LOG.debug("Got exception while trying to read from stream {}, " +
"client: {} object: {}, trying to recover: ",
uri, client, object, ioe);
uri, client, objectResponse, ioe);
} else {
LOG.info("Got exception while trying to read from stream {}, " +
"client: {} object: {}, trying to recover: " + ioe,
uri, client, object);
uri, client, objectResponse);
}
streamStatistics.readException();
closeStream("failure recovery", forceAbort, false);
@ -672,7 +663,6 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
CompletableFuture<Boolean> operation;
SDKStreamDrainer drainer = new SDKStreamDrainer(
uri,
object,
wrappedStream,
shouldAbort,
(int) remaining,
@ -694,7 +684,6 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
// either the stream is closed in the blocking call or the async call is
// submitted with its own copy of the references
wrappedStream = null;
object = null;
return operation;
}
@ -910,23 +899,19 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
private void readCombinedRangeAndUpdateChildren(CombinedFileRange combinedFileRange,
IntFunction<ByteBuffer> allocate) {
LOG.debug("Start reading combined range {} from path {} ", combinedFileRange, pathStr);
// This reference must be kept till all buffers are populated as this is a
// finalizable object which closes the internal stream when gc triggers.
S3Object objectRange = null;
S3ObjectInputStream objectContent = null;
ResponseInputStream<GetObjectResponse> rangeContent = null;
try {
objectRange = getS3ObjectAndValidateNotNull("readCombinedFileRange",
rangeContent = getS3ObjectInputStream("readCombinedFileRange",
combinedFileRange.getOffset(),
combinedFileRange.getLength());
objectContent = objectRange.getObjectContent();
populateChildBuffers(combinedFileRange, objectContent, allocate);
populateChildBuffers(combinedFileRange, rangeContent, allocate);
} catch (Exception ex) {
LOG.debug("Exception while reading a range {} from path {} ", combinedFileRange, pathStr, ex);
for(FileRange child : combinedFileRange.getUnderlying()) {
child.getData().completeExceptionally(ex);
}
} finally {
IOUtils.cleanupWithLogger(LOG, objectRange, objectContent);
IOUtils.cleanupWithLogger(LOG, rangeContent);
}
LOG.debug("Finished reading range {} from path {} ", combinedFileRange, pathStr);
}
@ -939,7 +924,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
* @throws IOException any IOE.
*/
private void populateChildBuffers(CombinedFileRange combinedFileRange,
S3ObjectInputStream objectContent,
InputStream objectContent,
IntFunction<ByteBuffer> allocate) throws IOException {
// If the combined file range just contains a single child
// range, we only have to fill that one child buffer else
@ -971,7 +956,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
* @param drainQuantity how many bytes to drain.
* @throws IOException any IOE.
*/
private void drainUnnecessaryData(S3ObjectInputStream objectContent, long drainQuantity)
private void drainUnnecessaryData(InputStream objectContent, long drainQuantity)
throws IOException {
int drainBytes = 0;
int readCount;
@ -1013,28 +998,24 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
*/
private void readSingleRange(FileRange range, ByteBuffer buffer) {
LOG.debug("Start reading range {} from path {} ", range, pathStr);
// This reference must be kept till all buffers are populated as this is a
// finalizable object which closes the internal stream when gc triggers.
S3Object objectRange = null;
S3ObjectInputStream objectContent = null;
ResponseInputStream<GetObjectResponse> objectRange = null;
try {
long position = range.getOffset();
int length = range.getLength();
objectRange = getS3ObjectAndValidateNotNull("readSingleRange", position, length);
objectContent = objectRange.getObjectContent();
populateBuffer(length, buffer, objectContent);
objectRange = getS3ObjectInputStream("readSingleRange", position, length);
populateBuffer(length, buffer, objectRange);
range.getData().complete(buffer);
} catch (Exception ex) {
LOG.warn("Exception while reading a range {} from path {} ", range, pathStr, ex);
range.getData().completeExceptionally(ex);
} finally {
IOUtils.cleanupWithLogger(LOG, objectRange, objectContent);
IOUtils.cleanupWithLogger(LOG, objectRange);
}
LOG.debug("Finished reading range {} from path {} ", range, pathStr);
}
/**
* Get the s3 object for S3 server for a specified range.
* Get the s3 object input stream for S3 server for a specified range.
* Also checks if the vectored io operation has been stopped before and after
* the http get request such that we don't waste time populating the buffers.
* @param operationName name of the operation for which get object on S3 is called.
@ -1043,15 +1024,11 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
* @return result s3 object.
* @throws IOException exception if any.
*/
private S3Object getS3ObjectAndValidateNotNull(final String operationName,
final long position,
final int length) throws IOException {
private ResponseInputStream<GetObjectResponse> getS3ObjectInputStream(
final String operationName, final long position, final int length) throws IOException {
checkIfVectoredIOStopped();
S3Object objectRange = getS3Object(operationName, position, length);
if (objectRange.getObjectContent() == null) {
throw new PathIOException(uri,
"Null IO stream received during " + operationName);
}
ResponseInputStream<GetObjectResponse> objectRange =
getS3Object(operationName, position, length);
checkIfVectoredIOStopped();
return objectRange;
}
@ -1066,7 +1043,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
*/
private void populateBuffer(int length,
ByteBuffer buffer,
S3ObjectInputStream objectContent) throws IOException {
InputStream objectContent) throws IOException {
if (buffer.isDirect()) {
VectoredReadUtils.readInDirectBuffer(length, buffer,
@ -1091,7 +1068,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
* @param length number of bytes to fill in dest.
* @throws IOException any IOE.
*/
private void readByteArray(S3ObjectInputStream objectContent,
private void readByteArray(InputStream objectContent,
byte[] dest,
int offset,
int length) throws IOException {
@ -1118,13 +1095,16 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
* @return S3Object result s3 object.
* @throws IOException exception if any.
*/
private S3Object getS3Object(String operationName, long position,
int length) throws IOException {
final GetObjectRequest request = client.newGetRequest(key)
.withRange(position, position + length - 1);
changeTracker.maybeApplyConstraint(request);
private ResponseInputStream<GetObjectResponse> getS3Object(String operationName,
long position,
int length)
throws IOException {
final GetObjectRequest request = client.newGetRequestBuilder(key)
.range(S3AUtils.formatRange(position, position + length - 1))
.applyMutation(changeTracker::maybeApplyConstraint)
.build();
DurationTracker tracker = streamStatistics.initiateGetRequest();
S3Object objectRange;
ResponseInputStream<GetObjectResponse> objectRange;
Invoker invoker = context.getReadInvoker();
try {
objectRange = invoker.retry(operationName, pathStr, true,
@ -1139,7 +1119,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
} finally {
tracker.close();
}
changeTracker.processResponse(objectRange, operationName,
changeTracker.processResponse(objectRange.response(), operationName,
position);
return objectRange;
}
@ -1293,11 +1273,11 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
public interface InputStreamCallbacks extends Closeable {
/**
* Create a GET request.
* Create a GET request builder.
* @param key object key
* @return the request
* @return the request builder
*/
GetObjectRequest newGetRequest(String key);
GetObjectRequest.Builder newGetRequestBuilder(String key);
/**
* Execute the request.
@ -1305,7 +1285,7 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead,
* @return the response
*/
@Retries.OnceRaw
S3Object getObject(GetObjectRequest request);
ResponseInputStream<GetObjectResponse> getObject(GetObjectRequest request);
/**
* Submit some asynchronous work, for example, draining a stream.

View File

@ -30,7 +30,8 @@ import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.amazonaws.AmazonClientException;
import software.amazon.awssdk.core.exception.SdkException;
import org.apache.hadoop.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -42,7 +43,7 @@ import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.net.ConnectTimeoutException;
import org.apache.hadoop.util.Preconditions;
import static org.apache.hadoop.io.retry.RetryPolicies.*;
@ -68,9 +69,9 @@ import static org.apache.hadoop.fs.s3a.Constants.*;
*
* The retry policy is all built around that of the normal IO exceptions,
* particularly those extracted from
* {@link S3AUtils#translateException(String, Path, AmazonClientException)}.
* {@link S3AUtils#translateException(String, Path, SdkException)}.
* Because the {@link #shouldRetry(Exception, int, int, boolean)} method
* does this translation if an {@code AmazonClientException} is processed,
* does this translation if an {@code SdkException} is processed,
* the policy defined for the IOEs also applies to the original exceptions.
*
* Put differently: this retry policy aims to work for handlers of the
@ -242,11 +243,10 @@ public class S3ARetryPolicy implements RetryPolicy {
boolean idempotent) throws Exception {
Preconditions.checkArgument(exception != null, "Null exception");
Exception ex = exception;
if (exception instanceof AmazonClientException) {
// uprate the amazon client exception for the purpose of exception
if (exception instanceof SdkException) {
// update the sdk exception for the purpose of exception
// processing.
ex = S3AUtils.translateException("", "",
(AmazonClientException) exception);
ex = S3AUtils.translateException("", "", (SdkException) exception);
}
return retryPolicy.shouldRetry(ex, retries, failovers, idempotent);
}

View File

@ -18,19 +18,12 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AbortedException;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.SdkBaseException;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
import com.amazonaws.retry.RetryUtils;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.core.exception.AbortedException;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.core.retry.RetryUtils;
import software.amazon.awssdk.services.s3.model.S3Exception;
import software.amazon.awssdk.services.s3.model.S3Object;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
@ -47,13 +40,11 @@ import org.apache.hadoop.util.functional.RemoteIterators;
import org.apache.hadoop.fs.s3a.audit.AuditFailureException;
import org.apache.hadoop.fs.s3a.audit.AuditIntegration;
import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets;
import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider;
import org.apache.hadoop.fs.s3a.impl.NetworkBinding;
import org.apache.hadoop.fs.s3a.impl.V2Migration;
import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
import org.apache.hadoop.net.ConnectTimeoutException;
import org.apache.hadoop.security.ProviderUtils;
import org.apache.hadoop.util.VersionInfo;
import org.apache.hadoop.thirdparty.com.google.common.collect.Lists;
import org.slf4j.Logger;
@ -74,23 +65,18 @@ import java.net.SocketTimeoutException;
import java.net.URI;
import java.nio.file.AccessDeniedException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH;
import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator;
@ -105,10 +91,7 @@ public final class S3AUtils {
static final String CONSTRUCTOR_EXCEPTION = "constructor exception";
static final String INSTANTIATION_EXCEPTION
= "instantiation exception";
static final String NOT_AWS_PROVIDER =
"does not implement AWSCredentialsProvider";
static final String ABSTRACT_PROVIDER =
"is abstract and therefore cannot be created";
static final String ENDPOINT_KEY = "Endpoint";
/** Filesystem is closed; kept here to keep the errors close. */
@ -143,21 +126,13 @@ public final class S3AUtils {
private static final String BUCKET_PATTERN = FS_S3A_BUCKET_PREFIX + "%s.%s";
/**
* Error message when the AWS provider list built up contains a forbidden
* entry.
*/
@VisibleForTesting
public static final String E_FORBIDDEN_AWS_PROVIDER
= "AWS provider class cannot be used";
private S3AUtils() {
}
/**
* Translate an exception raised in an operation into an IOException.
* The specific type of IOException depends on the class of
* {@link AmazonClientException} passed in, and any status codes included
* {@link SdkException} passed in, and any status codes included
* in the operation. That is: HTTP error codes are examined and can be
* used to build a more specific response.
*
@ -170,14 +145,14 @@ public final class S3AUtils {
*/
public static IOException translateException(String operation,
Path path,
AmazonClientException exception) {
SdkException exception) {
return translateException(operation, path.toString(), exception);
}
/**
* Translate an exception raised in an operation into an IOException.
* The specific type of IOException depends on the class of
* {@link AmazonClientException} passed in, and any status codes included
* {@link SdkException} passed in, and any status codes included
* in the operation. That is: HTTP error codes are examined and can be
* used to build a more specific response.
* @param operation operation
@ -188,12 +163,12 @@ public final class S3AUtils {
@SuppressWarnings("ThrowableInstanceNeverThrown")
public static IOException translateException(@Nullable String operation,
String path,
SdkBaseException exception) {
SdkException exception) {
String message = String.format("%s%s: %s",
operation,
StringUtils.isNotEmpty(path)? (" on " + path) : "",
exception);
if (!(exception instanceof AmazonServiceException)) {
if (!(exception instanceof AwsServiceException)) {
Exception innerCause = containsInterruptedException(exception);
if (innerCause != null) {
// interrupted IO, or a socket exception underneath that class
@ -217,45 +192,44 @@ public final class S3AUtils {
return new AWSClientIOException(message, exception);
} else {
IOException ioe;
AmazonServiceException ase = (AmazonServiceException) exception;
AwsServiceException ase = (AwsServiceException) exception;
// this exception is non-null if the service exception is an s3 one
AmazonS3Exception s3Exception = ase instanceof AmazonS3Exception
? (AmazonS3Exception) ase
S3Exception s3Exception = ase instanceof S3Exception
? (S3Exception) ase
: null;
int status = ase.getStatusCode();
message = message + ":" + ase.getErrorCode();
int status = ase.statusCode();
if (ase.awsErrorDetails() != null) {
message = message + ":" + ase.awsErrorDetails().errorCode();
}
switch (status) {
case 301:
case 307:
case SC_301_MOVED_PERMANENTLY:
case SC_307_TEMPORARY_REDIRECT:
if (s3Exception != null) {
if (s3Exception.getAdditionalDetails() != null &&
s3Exception.getAdditionalDetails().containsKey(ENDPOINT_KEY)) {
message = String.format("Received permanent redirect response to "
+ "endpoint %s. This likely indicates that the S3 endpoint "
+ "configured in %s does not match the AWS region containing "
+ "the bucket.",
s3Exception.getAdditionalDetails().get(ENDPOINT_KEY), ENDPOINT);
}
message = String.format("Received permanent redirect response to "
+ "region %s. This likely indicates that the S3 region "
+ "configured in %s does not match the AWS region containing " + "the bucket.",
s3Exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER),
AWS_REGION);
ioe = new AWSRedirectException(message, s3Exception);
} else {
ioe = new AWSRedirectException(message, ase);
}
break;
case 400:
case SC_400_BAD_REQUEST:
ioe = new AWSBadRequestException(message, ase);
break;
// permissions
case 401:
case 403:
case SC_401_UNAUTHORIZED:
case SC_403_FORBIDDEN:
ioe = new AccessDeniedException(path, null, message);
ioe.initCause(ase);
break;
// the object isn't there
case 404:
case SC_404_NOT_FOUND:
if (isUnknownBucket(ase)) {
// this is a missing bucket
ioe = new UnknownStoreException(path, message, ase);
@ -268,20 +242,20 @@ public final class S3AUtils {
// this also surfaces sometimes and is considered to
// be ~ a not found exception.
case 410:
case SC_410_GONE:
ioe = new FileNotFoundException(message);
ioe.initCause(ase);
break;
// method not allowed; seen on S3 Select.
// treated as a bad request
case 405:
case SC_405_METHOD_NOT_ALLOWED:
ioe = new AWSBadRequestException(message, s3Exception);
break;
// out of range. This may happen if an object is overwritten with
// a shorter one while it is being read.
case 416:
case SC_416_RANGE_NOT_SATISFIABLE:
ioe = new EOFException(message);
ioe.initCause(ase);
break;
@ -289,26 +263,26 @@ public final class S3AUtils {
// this has surfaced as a "no response from server" message.
// so rare we haven't replicated it.
// Treating as an idempotent proxy error.
case 443:
case 444:
case SC_443_NO_RESPONSE:
case SC_444_NO_RESPONSE:
ioe = new AWSNoResponseException(message, ase);
break;
// throttling
case 503:
case SC_503_SERVICE_UNAVAILABLE:
ioe = new AWSServiceThrottledException(message, ase);
break;
// internal error
case 500:
case SC_500_INTERNAL_SERVER_ERROR:
ioe = new AWSStatus500Exception(message, ase);
break;
case 200:
case SC_200_OK:
if (exception instanceof MultiObjectDeleteException) {
// failure during a bulk delete
return translateDeleteException(message,
(MultiObjectDeleteException) exception);
return ((MultiObjectDeleteException) exception)
.translateException(message);
}
// other 200: FALL THROUGH
@ -334,10 +308,35 @@ public final class S3AUtils {
public static IOException extractException(String operation,
String path,
ExecutionException ee) {
return convertExceptionCause(operation, path, ee.getCause());
}
/**
* Extract an exception from a failed future, and convert to an IOE.
* @param operation operation which failed
* @param path path operated on (may be null)
* @param ce completion exception
* @return an IOE which can be thrown
*/
public static IOException extractException(String operation,
String path,
CompletionException ce) {
return convertExceptionCause(operation, path, ce.getCause());
}
/**
* Convert the cause of a concurrent exception to an IOE.
* @param operation operation which failed
* @param path path operated on (may be null)
* @param cause cause of a concurrent exception
* @return an IOE which can be thrown
*/
private static IOException convertExceptionCause(String operation,
String path,
Throwable cause) {
IOException ioe;
Throwable cause = ee.getCause();
if (cause instanceof AmazonClientException) {
ioe = translateException(operation, path, (AmazonClientException) cause);
if (cause instanceof SdkException) {
ioe = translateException(operation, path, (SdkException) cause);
} else if (cause instanceof IOException) {
ioe = (IOException) cause;
} else {
@ -375,7 +374,7 @@ public final class S3AUtils {
* @return an IOE which can be rethrown
*/
private static InterruptedIOException translateInterruptedException(
SdkBaseException exception,
SdkException exception,
final Exception innerCause,
String message) {
InterruptedIOException ioe;
@ -386,6 +385,7 @@ public final class S3AUtils {
if (name.endsWith(".ConnectTimeoutException")
|| name.endsWith(".ConnectionPoolTimeoutException")
|| name.endsWith("$ConnectTimeoutException")) {
// TODO: review in v2
// TCP connection http timeout from the shaded or unshaded filenames
// com.amazonaws.thirdparty.apache.http.conn.ConnectTimeoutException
ioe = new ConnectTimeoutException(message);
@ -409,10 +409,10 @@ public final class S3AUtils {
*/
public static boolean isThrottleException(Exception ex) {
return ex instanceof AWSServiceThrottledException
|| (ex instanceof AmazonServiceException
&& 503 == ((AmazonServiceException)ex).getStatusCode())
|| (ex instanceof SdkBaseException
&& RetryUtils.isThrottlingException((SdkBaseException) ex));
|| (ex instanceof AwsServiceException
&& 503 == ((AwsServiceException)ex).statusCode())
|| (ex instanceof SdkException
&& RetryUtils.isThrottlingException((SdkException) ex));
}
/**
@ -422,7 +422,8 @@ public final class S3AUtils {
* @param ex exception
* @return true if this is believed to be a sign the connection was broken.
*/
public static boolean isMessageTranslatableToEOF(SdkBaseException ex) {
public static boolean isMessageTranslatableToEOF(SdkException ex) {
// TODO: review in v2
return ex.toString().contains(EOF_MESSAGE_IN_XML_PARSER) ||
ex.toString().contains(EOF_READ_DIFFERENT_LENGTH);
}
@ -432,47 +433,26 @@ public final class S3AUtils {
* @param e exception
* @return string details
*/
public static String stringify(AmazonServiceException e) {
public static String stringify(AwsServiceException e) {
StringBuilder builder = new StringBuilder(
String.format("%s: %s error %d: %s; %s%s%n",
e.getErrorType(),
e.getServiceName(),
e.getStatusCode(),
e.getErrorCode(),
e.getErrorMessage(),
(e.isRetryable() ? " (retryable)": "")
String.format("%s error %d: %s; %s%s%n",
e.awsErrorDetails().serviceName(),
e.statusCode(),
e.awsErrorDetails().errorCode(),
e.awsErrorDetails().errorMessage(),
(e.retryable() ? " (retryable)": "")
));
String rawResponseContent = e.getRawResponseContent();
String rawResponseContent = e.awsErrorDetails().rawResponse().asUtf8String();
if (rawResponseContent != null) {
builder.append(rawResponseContent);
}
return builder.toString();
}
/**
* Get low level details of an amazon exception for logging; multi-line.
* @param e exception
* @return string details
*/
public static String stringify(AmazonS3Exception e) {
// get the low level details of an exception,
StringBuilder builder = new StringBuilder(
stringify((AmazonServiceException) e));
Map<String, String> details = e.getAdditionalDetails();
if (details != null) {
builder.append('\n');
for (Map.Entry<String, String> d : details.entrySet()) {
builder.append(d.getKey()).append('=')
.append(d.getValue()).append('\n');
}
}
return builder.toString();
}
/**
* Create a files status instance from a listing.
* @param keyPath path to entry
* @param summary summary from AWS
* @param s3Object s3Object entry
* @param blockSize block size to declare.
* @param owner owner of the file
* @param eTag S3 object eTag or null if unavailable
@ -481,20 +461,20 @@ public final class S3AUtils {
* @return a status entry
*/
public static S3AFileStatus createFileStatus(Path keyPath,
S3ObjectSummary summary,
S3Object s3Object,
long blockSize,
String owner,
String eTag,
String versionId,
boolean isCSEEnabled) {
long size = summary.getSize();
long size = s3Object.size();
// check if cse is enabled; strip out constant padding length.
if (isCSEEnabled && size >= CSE_PADDING_LENGTH) {
size -= CSE_PADDING_LENGTH;
}
return createFileStatus(keyPath,
objectRepresentsDirectory(summary.getKey()),
size, summary.getLastModified(), blockSize, owner, eTag, versionId);
objectRepresentsDirectory(s3Object.key()),
size, Date.from(s3Object.lastModified()), blockSize, owner, eTag, versionId);
}
/**
@ -555,115 +535,8 @@ public final class S3AUtils {
return date.getTime();
}
/**
* The standard AWS provider list for AWS connections.
*/
@SuppressWarnings("deprecation")
public static final List<Class<?>>
STANDARD_AWS_PROVIDERS = Collections.unmodifiableList(
Arrays.asList(
TemporaryAWSCredentialsProvider.class,
SimpleAWSCredentialsProvider.class,
EnvironmentVariableCredentialsProvider.class,
IAMInstanceCredentialsProvider.class));
/**
* Create the AWS credentials from the providers, the URI and
* the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration.
* @param binding Binding URI -may be null
* @param conf filesystem configuration
* @return a credentials provider list
* @throws IOException Problems loading the providers (including reading
* secrets from credential files).
*/
public static AWSCredentialProviderList createAWSCredentialProviderSet(
@Nullable URI binding,
Configuration conf) throws IOException {
// this will reject any user:secret entries in the URI
S3xLoginHelper.rejectSecretsInURIs(binding);
AWSCredentialProviderList credentials =
buildAWSProviderList(binding,
conf,
AWS_CREDENTIALS_PROVIDER,
STANDARD_AWS_PROVIDERS,
new HashSet<>());
// make sure the logging message strips out any auth details
LOG.debug("For URI {}, using credentials {}",
binding, credentials);
return credentials;
}
/**
* Load list of AWS credential provider/credential provider factory classes.
* @param conf configuration
* @param key key
* @param defaultValue list of default values
* @return the list of classes, possibly empty
* @throws IOException on a failure to load the list.
*/
public static List<Class<?>> loadAWSProviderClasses(Configuration conf,
String key,
Class<?>... defaultValue) throws IOException {
try {
return Arrays.asList(conf.getClasses(key, defaultValue));
} catch (RuntimeException e) {
Throwable c = e.getCause() != null ? e.getCause() : e;
throw new IOException("From option " + key + ' ' + c, c);
}
}
/**
* Load list of AWS credential provider/credential provider factory classes;
* support a forbidden list to prevent loops, mandate full secrets, etc.
* @param binding Binding URI -may be null
* @param conf configuration
* @param key key
* @param forbidden a possibly empty set of forbidden classes.
* @param defaultValues list of default providers.
* @return the list of classes, possibly empty
* @throws IOException on a failure to load the list.
*/
public static AWSCredentialProviderList buildAWSProviderList(
@Nullable final URI binding,
final Configuration conf,
final String key,
final List<Class<?>> defaultValues,
final Set<Class<?>> forbidden) throws IOException {
// build up the base provider
List<Class<?>> awsClasses = loadAWSProviderClasses(conf,
key,
defaultValues.toArray(new Class[defaultValues.size()]));
// and if the list is empty, switch back to the defaults.
// this is to address the issue that configuration.getClasses()
// doesn't return the default if the config value is just whitespace.
if (awsClasses.isEmpty()) {
awsClasses = defaultValues;
}
// iterate through, checking for blacklists and then instantiating
// each provider
AWSCredentialProviderList providers = new AWSCredentialProviderList();
for (Class<?> aClass : awsClasses) {
// List of V1 credential providers that will be migrated with V2 upgrade
if (!Arrays.asList("EnvironmentVariableCredentialsProvider",
"EC2ContainerCredentialsProviderWrapper", "InstanceProfileCredentialsProvider")
.contains(aClass.getSimpleName()) && aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)) {
V2Migration.v1ProviderReferenced(aClass.getName());
}
if (forbidden.contains(aClass)) {
throw new IOException(E_FORBIDDEN_AWS_PROVIDER
+ " in option " + key + ": " + aClass);
}
providers.add(createAWSCredentialProvider(conf,
aClass, binding));
}
return providers;
}
/**
* Create an AWS credential provider from its class by using reflection. The
/***
* Creates an instance of a class using reflection. The
* class must implement one of the following means of construction, which are
* attempted in order:
*
@ -672,92 +545,83 @@ public final class S3AUtils {
* org.apache.hadoop.conf.Configuration</li>
* <li>a public constructor accepting
* org.apache.hadoop.conf.Configuration</li>
* <li>a public static method named getInstance that accepts no
* <li>a public static method named as per methodName, that accepts no
* arguments and returns an instance of
* com.amazonaws.auth.AWSCredentialsProvider, or</li>
* specified type, or</li>
* <li>a public default constructor.</li>
* </ol>
*
* @param instanceClass Class for which instance is to be created
* @param conf configuration
* @param credClass credential class
* @param uri URI of the FS
* @return the instantiated class
* @throws IOException on any instantiation failure.
* @param interfaceImplemented interface that this class implements
* @param methodName name of factory method to be invoked
* @param configKey config key under which this class is specified
* @param <InstanceT> Instance of class
* @return instance of the specified class
* @throws IOException on any problem
*/
private static AWSCredentialsProvider createAWSCredentialProvider(
Configuration conf,
Class<?> credClass,
@Nullable URI uri) throws IOException {
AWSCredentialsProvider credentials = null;
String className = credClass.getName();
if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) {
throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER);
}
if (Modifier.isAbstract(credClass.getModifiers())) {
throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER);
}
LOG.debug("Credential provider class is {}", className);
@SuppressWarnings("unchecked")
public static <InstanceT> InstanceT getInstanceFromReflection(Class<?> instanceClass,
Configuration conf, @Nullable URI uri, Class<?> interfaceImplemented, String methodName,
String configKey) throws IOException {
String className = instanceClass.getName();
try {
// new X(uri, conf)
Constructor cons = getConstructor(credClass, URI.class,
Configuration.class);
if (cons != null) {
credentials = (AWSCredentialsProvider)cons.newInstance(uri, conf);
return credentials;
}
// new X(conf)
cons = getConstructor(credClass, Configuration.class);
if (cons != null) {
credentials = (AWSCredentialsProvider)cons.newInstance(conf);
return credentials;
Constructor cons = null;
if (conf != null) {
// new X(uri, conf)
cons = getConstructor(instanceClass, URI.class, Configuration.class);
if (cons != null) {
return (InstanceT) cons.newInstance(uri, conf);
}
// new X(conf)
cons = getConstructor(instanceClass, Configuration.class);
if (cons != null) {
return (InstanceT) cons.newInstance(conf);
}
}
// X.getInstance()
Method factory = getFactoryMethod(credClass, AWSCredentialsProvider.class,
"getInstance");
// X.methodName()
Method factory = getFactoryMethod(instanceClass, interfaceImplemented, methodName);
if (factory != null) {
credentials = (AWSCredentialsProvider)factory.invoke(null);
return credentials;
return (InstanceT) factory.invoke(null);
}
// new X()
cons = getConstructor(credClass);
cons = getConstructor(instanceClass);
if (cons != null) {
credentials = (AWSCredentialsProvider)cons.newInstance();
return credentials;
return (InstanceT) cons.newInstance();
}
// no supported constructor or factory method found
throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION
+ ". A class specified in %s must provide a public constructor "
+ "of a supported signature, or a public factory method named "
+ "getInstance that accepts no arguments.",
className, AWS_CREDENTIALS_PROVIDER));
+ "create that accepts no arguments.", className, configKey));
} catch (InvocationTargetException e) {
Throwable targetException = e.getTargetException();
if (targetException == null) {
targetException = e;
targetException = e;
}
if (targetException instanceof IOException) {
throw (IOException) targetException;
} else if (targetException instanceof SdkBaseException) {
throw translateException("Instantiate " + className, "",
(SdkBaseException) targetException);
} else if (targetException instanceof SdkException) {
throw translateException("Instantiate " + className, "", (SdkException) targetException);
} else {
// supported constructor or factory method found, but the call failed
throw new IOException(className + " " + INSTANTIATION_EXCEPTION
+ ": " + targetException,
throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + targetException,
targetException);
}
} catch (ReflectiveOperationException | IllegalArgumentException e) {
// supported constructor or factory method found, but the call failed
throw new IOException(className + " " + INSTANTIATION_EXCEPTION
+ ": " + e,
e);
throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + e, e);
}
}
/**
* Set a key if the value is non-empty.
* @param config config to patch
@ -941,13 +805,13 @@ public final class S3AUtils {
/**
* String information about a summary entry for debug messages.
* @param summary summary object
* @param s3Object s3Object entry
* @return string value
*/
public static String stringify(S3ObjectSummary summary) {
StringBuilder builder = new StringBuilder(summary.getKey().length() + 100);
builder.append(summary.getKey()).append(' ');
builder.append("size=").append(summary.getSize());
public static String stringify(S3Object s3Object) {
StringBuilder builder = new StringBuilder(s3Object.key().length() + 100);
builder.append(s3Object.key()).append(' ');
builder.append("size=").append(s3Object.size());
return builder.toString();
}
@ -1215,213 +1079,6 @@ public final class S3AUtils {
}
}
/**
* Create a new AWS {@code ClientConfiguration}.
* All clients to AWS services <i>MUST</i> use this for consistent setup
* of connectivity, UA, proxy settings.
* @param conf The Hadoop configuration
* @param bucket Optional bucket to use to look up per-bucket proxy secrets
* @return new AWS client configuration
* @throws IOException problem creating AWS client configuration
*
* @deprecated use {@link #createAwsConf(Configuration, String, String)}
*/
@Deprecated
public static ClientConfiguration createAwsConf(Configuration conf,
String bucket)
throws IOException {
return createAwsConf(conf, bucket, null);
}
/**
* Create a new AWS {@code ClientConfiguration}. All clients to AWS services
* <i>MUST</i> use this or the equivalents for the specific service for
* consistent setup of connectivity, UA, proxy settings.
*
* @param conf The Hadoop configuration
* @param bucket Optional bucket to use to look up per-bucket proxy secrets
* @param awsServiceIdentifier a string representing the AWS service (S3,
* etc) for which the ClientConfiguration is being created.
* @return new AWS client configuration
* @throws IOException problem creating AWS client configuration
*/
public static ClientConfiguration createAwsConf(Configuration conf,
String bucket, String awsServiceIdentifier)
throws IOException {
final ClientConfiguration awsConf = new ClientConfiguration();
initConnectionSettings(conf, awsConf);
initProxySupport(conf, bucket, awsConf);
initUserAgent(conf, awsConf);
if (StringUtils.isNotEmpty(awsServiceIdentifier)) {
String configKey = null;
switch (awsServiceIdentifier) {
case AWS_SERVICE_IDENTIFIER_S3:
configKey = SIGNING_ALGORITHM_S3;
break;
case AWS_SERVICE_IDENTIFIER_STS:
configKey = SIGNING_ALGORITHM_STS;
break;
default:
// Nothing to do. The original signer override is already setup
}
if (configKey != null) {
String signerOverride = conf.getTrimmed(configKey, "");
if (!signerOverride.isEmpty()) {
LOG.debug("Signer override for {}} = {}", awsServiceIdentifier,
signerOverride);
awsConf.setSignerOverride(signerOverride);
}
}
}
return awsConf;
}
/**
* Initializes all AWS SDK settings related to connection management.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
*
* @throws IOException if there was an error initializing the protocol
* settings
*/
public static void initConnectionSettings(Configuration conf,
ClientConfiguration awsConf) throws IOException {
awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS,
DEFAULT_MAXIMUM_CONNECTIONS, 1));
initProtocolSettings(conf, awsConf);
awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES,
DEFAULT_MAX_ERROR_RETRIES, 0));
awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT,
DEFAULT_ESTABLISH_TIMEOUT, 0));
awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT,
DEFAULT_SOCKET_TIMEOUT, 0));
int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER,
DEFAULT_SOCKET_SEND_BUFFER, 2048);
int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER,
DEFAULT_SOCKET_RECV_BUFFER, 2048);
long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT,
DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS);
if (requestTimeoutMillis > Integer.MAX_VALUE) {
LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead",
requestTimeoutMillis, Integer.MAX_VALUE);
requestTimeoutMillis = Integer.MAX_VALUE;
}
awsConf.setRequestTimeout((int) requestTimeoutMillis);
awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer);
String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, "");
if (!signerOverride.isEmpty()) {
LOG.debug("Signer override = {}", signerOverride);
awsConf.setSignerOverride(signerOverride);
}
}
/**
* Initializes the connection protocol settings when connecting to S3 (e.g.
* either HTTP or HTTPS). If secure connections are enabled, this method
* will load the configured SSL providers.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
*
* @throws IOException if there is an error initializing the configured
* {@link javax.net.ssl.SSLSocketFactory}
*/
private static void initProtocolSettings(Configuration conf,
ClientConfiguration awsConf) throws IOException {
boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
DEFAULT_SECURE_CONNECTIONS);
awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP);
if (secureConnections) {
NetworkBinding.bindSSLChannelMode(conf, awsConf);
}
}
/**
* Initializes AWS SDK proxy support in the AWS client configuration
* if the S3A settings enable it.
*
* @param conf Hadoop configuration
* @param bucket Optional bucket to use to look up per-bucket proxy secrets
* @param awsConf AWS SDK configuration to update
* @throws IllegalArgumentException if misconfigured
* @throws IOException problem getting username/secret from password source.
*/
public static void initProxySupport(Configuration conf,
String bucket,
ClientConfiguration awsConf) throws IllegalArgumentException,
IOException {
String proxyHost = conf.getTrimmed(PROXY_HOST, "");
int proxyPort = conf.getInt(PROXY_PORT, -1);
if (!proxyHost.isEmpty()) {
awsConf.setProxyHost(proxyHost);
if (proxyPort >= 0) {
awsConf.setProxyPort(proxyPort);
} else {
if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) {
LOG.warn("Proxy host set without port. Using HTTPS default 443");
awsConf.setProxyPort(443);
} else {
LOG.warn("Proxy host set without port. Using HTTP default 80");
awsConf.setProxyPort(80);
}
}
final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME,
null, null);
final String proxyPassword = lookupPassword(bucket, conf, PROXY_PASSWORD,
null, null);
if ((proxyUsername == null) != (proxyPassword == null)) {
String msg = "Proxy error: " + PROXY_USERNAME + " or " +
PROXY_PASSWORD + " set without the other.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
boolean isProxySecured = conf.getBoolean(PROXY_SECURED, false);
awsConf.setProxyUsername(proxyUsername);
awsConf.setProxyPassword(proxyPassword);
awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN));
awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION));
awsConf.setProxyProtocol(isProxySecured ? Protocol.HTTPS : Protocol.HTTP);
if (LOG.isDebugEnabled()) {
LOG.debug("Using proxy server {}://{}:{} as user {} with password {} "
+ "on domain {} as workstation {}",
awsConf.getProxyProtocol(),
awsConf.getProxyHost(),
awsConf.getProxyPort(),
String.valueOf(awsConf.getProxyUsername()),
awsConf.getProxyPassword(), awsConf.getProxyDomain(),
awsConf.getProxyWorkstation());
}
} else if (proxyPort >= 0) {
String msg =
"Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST;
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
}
/**
* Initializes the User-Agent header to send in HTTP requests to AWS
* services. We always include the Hadoop version number. The user also
* may set an optional custom prefix to put in front of the Hadoop version
* number. The AWS SDK internally appends its own information, which seems
* to include the AWS SDK version, OS and JVM version.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration to update
*/
private static void initUserAgent(Configuration conf,
ClientConfiguration awsConf) {
String userAgent = "Hadoop " + VersionInfo.getVersion();
String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, "");
if (!userAgentPrefix.isEmpty()) {
userAgent = userAgentPrefix + ", " + userAgent;
}
LOG.debug("Using User-Agent: {}", userAgent);
awsConf.setUserAgentPrefix(userAgent);
}
/**
* Convert the data of an iterator of {@link S3AFileStatus} to
* an array.
@ -1915,4 +1572,15 @@ public final class S3AUtils {
}
};
/**
* Format a byte range for a request header.
* See https://www.rfc-editor.org/rfc/rfc9110.html#section-14.1.2
*
* @param rangeStart the start byte offset
* @param rangeEnd the end byte offset (inclusive)
* @return a formatted byte range
*/
public static String formatRange(long rangeStart, long rangeEnd) {
return String.format("bytes=%d-%d", rangeStart, rangeEnd);
}
}

View File

@ -24,11 +24,14 @@ import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executor;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.monitoring.MonitoringListener;
import com.amazonaws.services.s3.AmazonS3;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3AsyncClient;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.transfer.s3.S3TransferManager;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -37,7 +40,7 @@ import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
/**
* Factory for creation of {@link AmazonS3} client instances.
* Factory for creation of {@link S3Client} client instances.
* Important: HBase's HBoss module implements this interface in its
* tests.
* Take care when updating this interface to ensure that a client
@ -49,20 +52,44 @@ import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
*/
@InterfaceAudience.LimitedPrivate("HBoss")
@InterfaceStability.Evolving
@Deprecated
public interface S3ClientFactory {
/**
* Creates a new {@link AmazonS3} client.
* Creates a new {@link S3Client}.
* The client returned supports synchronous operations. For
* asynchronous operations, use
* {@link #createS3AsyncClient(URI, S3ClientCreationParameters)}.
*
* @param uri S3A file system URI
* @param parameters parameter object
* @return S3 client
* @throws IOException IO problem
* @throws IOException on any IO problem
*/
AmazonS3 createS3Client(URI uri,
S3Client createS3Client(URI uri,
S3ClientCreationParameters parameters) throws IOException;
/**
* Creates a new {@link S3AsyncClient}.
* The client returned supports asynchronous operations. For
* synchronous operations, use
* {@link #createS3Client(URI, S3ClientCreationParameters)}.
*
* @param uri S3A file system URI
* @param parameters parameter object
* @return Async S3 client
* @throws IOException on any IO problem
*/
S3AsyncClient createS3AsyncClient(URI uri,
S3ClientCreationParameters parameters) throws IOException;
/**
* Creates a new {@link S3TransferManager}.
*
* @param s3AsyncClient the async client to be used by the TM.
* @return S3 transfer manager
*/
S3TransferManager createS3TransferManager(S3AsyncClient s3AsyncClient);
/**
* Settings for the S3 Client.
* Implemented as a class to pass in so that adding
@ -74,7 +101,7 @@ public interface S3ClientFactory {
/**
* Credentials.
*/
private AWSCredentialsProvider credentialSet;
private AwsCredentialsProvider credentialSet;
/**
* Endpoint.
@ -86,11 +113,6 @@ public interface S3ClientFactory {
*/
private final Map<String, String> headers = new HashMap<>();
/**
* Monitoring listener.
*/
private MonitoringListener monitoringListener;
/**
* RequestMetricCollector metrics...if not-null will be wrapped
* with an {@code AwsStatisticsCollector} and passed to
@ -109,9 +131,9 @@ public interface S3ClientFactory {
private boolean requesterPays;
/**
* Request handlers; used for auditing, X-Ray etc.
*/
private List<RequestHandler2> requestHandlers;
* Execution interceptors; used for auditing, X-Ray etc.
* */
private List<ExecutionInterceptor> executionInterceptors;
/**
* Suffix to UA.
@ -125,37 +147,38 @@ public interface S3ClientFactory {
private URI pathUri;
/**
* List of request handlers to include in the chain
* of request execution in the SDK.
* @return the handler list
* Minimum part size for transfer parts.
*/
public List<RequestHandler2> getRequestHandlers() {
return requestHandlers;
private long minimumPartSize;
/**
* Executor that the transfer manager will use to execute background tasks.
*/
private Executor transferManagerExecutor;
/**
* Region of the S3 bucket.
*/
private Region region;
/**
* List of execution interceptors to include in the chain
* of interceptors in the SDK.
* @return the interceptors list
*/
public List<ExecutionInterceptor> getExecutionInterceptors() {
return executionInterceptors;
}
/**
* List of request handlers.
* @param handlers handler list.
* List of execution interceptors.
* @param interceptors interceptors list.
* @return this object
*/
public S3ClientCreationParameters withRequestHandlers(
@Nullable final List<RequestHandler2> handlers) {
requestHandlers = handlers;
return this;
}
public MonitoringListener getMonitoringListener() {
return monitoringListener;
}
/**
* listener for AWS monitoring events.
* @param listener listener
* @return this object
*/
public S3ClientCreationParameters withMonitoringListener(
@Nullable final MonitoringListener listener) {
monitoringListener = listener;
public S3ClientCreationParameters withExecutionInterceptors(
@Nullable final List<ExecutionInterceptor> interceptors) {
executionInterceptors = interceptors;
return this;
}
@ -191,7 +214,7 @@ public interface S3ClientFactory {
return requesterPays;
}
public AWSCredentialsProvider getCredentialSet() {
public AwsCredentialsProvider getCredentialSet() {
return credentialSet;
}
@ -202,7 +225,7 @@ public interface S3ClientFactory {
*/
public S3ClientCreationParameters withCredentialSet(
final AWSCredentialsProvider value) {
final AwsCredentialsProvider value) {
credentialSet = value;
return this;
}
@ -294,5 +317,63 @@ public interface S3ClientFactory {
pathUri = value;
return this;
}
/**
* Get the minimum part size for transfer parts.
* @return part size
*/
public long getMinimumPartSize() {
return minimumPartSize;
}
/**
* Set the minimum part size for transfer parts.
* @param value new value
* @return the builder
*/
public S3ClientCreationParameters withMinimumPartSize(
final long value) {
minimumPartSize = value;
return this;
}
/**
* Get the executor that the transfer manager will use to execute background tasks.
* @return part size
*/
public Executor getTransferManagerExecutor() {
return transferManagerExecutor;
}
/**
* Set the executor that the transfer manager will use to execute background tasks.
* @param value new value
* @return the builder
*/
public S3ClientCreationParameters withTransferManagerExecutor(
final Executor value) {
transferManagerExecutor = value;
return this;
}
/**
* Set region.
*
* @param value new value
* @return the builder
*/
public S3ClientCreationParameters withRegion(
final Region value) {
region = value;
return this;
}
/**
* Get the region.
* @return invoker
*/
public Region getRegion() {
return region;
}
}
}

View File

@ -18,8 +18,8 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import software.amazon.awssdk.services.s3.model.ListObjectsRequest;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
/**
* API version-independent container for S3 List requests.
@ -78,14 +78,14 @@ public final class S3ListRequest {
public String toString() {
if (isV1()) {
return String.format(DESCRIPTION,
v1Request.getBucketName(), v1Request.getPrefix(),
v1Request.getDelimiter(), v1Request.getMaxKeys(),
v1Request.isRequesterPays());
v1Request.bucket(), v1Request.prefix(),
v1Request.delimiter(), v1Request.maxKeys(),
v1Request.requestPayerAsString());
} else {
return String.format(DESCRIPTION,
v2Request.getBucketName(), v2Request.getPrefix(),
v2Request.getDelimiter(), v2Request.getMaxKeys(),
v2Request.isRequesterPays());
v2Request.bucket(), v2Request.prefix(),
v2Request.delimiter(), v2Request.maxKeys(),
v2Request.requestPayerAsString());
}
}
}

View File

@ -22,19 +22,21 @@ import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import software.amazon.awssdk.services.s3.model.CommonPrefix;
import software.amazon.awssdk.services.s3.model.ListObjectsResponse;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Response;
import software.amazon.awssdk.services.s3.model.S3Object;
import org.slf4j.Logger;
/**
* API version-independent container for S3 List responses.
*/
public class S3ListResult {
private ObjectListing v1Result;
private ListObjectsV2Result v2Result;
private ListObjectsResponse v1Result;
private ListObjectsV2Response v2Result;
protected S3ListResult(ObjectListing v1, ListObjectsV2Result v2) {
protected S3ListResult(ListObjectsResponse v1, ListObjectsV2Response v2) {
v1Result = v1;
v2Result = v2;
}
@ -44,7 +46,7 @@ public class S3ListResult {
* @param result v1 result
* @return new list result container
*/
public static S3ListResult v1(ObjectListing result) {
public static S3ListResult v1(ListObjectsResponse result) {
return new S3ListResult(result, null);
}
@ -53,7 +55,7 @@ public class S3ListResult {
* @param result v2 result
* @return new list result container
*/
public static S3ListResult v2(ListObjectsV2Result result) {
public static S3ListResult v2(ListObjectsV2Response result) {
return new S3ListResult(null, result);
}
@ -65,19 +67,19 @@ public class S3ListResult {
return v1Result != null;
}
public ObjectListing getV1() {
public ListObjectsResponse getV1() {
return v1Result;
}
public ListObjectsV2Result getV2() {
public ListObjectsV2Response getV2() {
return v2Result;
}
public List<S3ObjectSummary> getObjectSummaries() {
public List<S3Object> getS3Objects() {
if (isV1()) {
return v1Result.getObjectSummaries();
return v1Result.contents();
} else {
return v2Result.getObjectSummaries();
return v2Result.contents();
}
}
@ -89,21 +91,21 @@ public class S3ListResult {
}
}
public List<String> getCommonPrefixes() {
public List<CommonPrefix> getCommonPrefixes() {
if (isV1()) {
return v1Result.getCommonPrefixes();
return v1Result.commonPrefixes();
} else {
return v2Result.getCommonPrefixes();
return v2Result.commonPrefixes();
}
}
/**
* Get the list of keys in the object summary.
* Get the list of keys in the list result.
* @return a possibly empty list
*/
private List<String> objectSummaryKeys() {
return getObjectSummaries().stream()
.map(S3ObjectSummary::getKey)
private List<String> objectKeys() {
return getS3Objects().stream()
.map(S3Object::key)
.collect(Collectors.toList());
}
@ -112,9 +114,8 @@ public class S3ListResult {
* @return true if the result is non-empty
*/
public boolean hasPrefixesOrObjects() {
return !(getCommonPrefixes()).isEmpty()
|| !getObjectSummaries().isEmpty();
|| !getS3Objects().isEmpty();
}
/**
@ -128,7 +129,7 @@ public class S3ListResult {
// no children.
// So the listing must contain the marker entry only as an object,
// and prefixes is null
List<String> keys = objectSummaryKeys();
List<String> keys = objectKeys();
return keys.size() == 1 && keys.contains(dirKey)
&& getCommonPrefixes().isEmpty();
}
@ -138,15 +139,15 @@ public class S3ListResult {
* @param log log to use
*/
public void logAtDebug(Logger log) {
Collection<String> prefixes = getCommonPrefixes();
Collection<S3ObjectSummary> summaries = getObjectSummaries();
Collection<CommonPrefix> prefixes = getCommonPrefixes();
Collection<S3Object> s3Objects = getS3Objects();
log.debug("Prefix count = {}; object count={}",
prefixes.size(), summaries.size());
for (S3ObjectSummary summary : summaries) {
log.debug("Summary: {} {}", summary.getKey(), summary.getSize());
prefixes.size(), s3Objects.size());
for (S3Object s3Object : s3Objects) {
log.debug("Summary: {} {}", s3Object.key(), s3Object.size());
}
for (String prefix : prefixes) {
log.debug("Prefix: {}", prefix);
for (CommonPrefix prefix : prefixes) {
log.debug("Prefix: {}", prefix.prefix());
}
}
}

View File

@ -18,8 +18,6 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Path;
@ -74,31 +72,6 @@ public class S3ObjectAttributes {
this.len = len;
}
/**
* Construct from the result of a copy and those parameters
* which aren't included in an AWS SDK response.
* @param path path
* @param copyResult copy result.
* @param serverSideEncryptionAlgorithm current encryption algorithm
* @param serverSideEncryptionKey any server side encryption key?
* @param len object length
*/
public S3ObjectAttributes(
final Path path,
final CopyResult copyResult,
final S3AEncryptionMethods serverSideEncryptionAlgorithm,
final String serverSideEncryptionKey,
final long len) {
this.bucket = copyResult.getDestinationBucketName();
this.key = copyResult.getDestinationKey();
this.path = path;
this.serverSideEncryptionAlgorithm = serverSideEncryptionAlgorithm;
this.serverSideEncryptionKey = serverSideEncryptionKey;
this.eTag = copyResult.getETag();
this.versionId = copyResult.getVersionId();
this.len = len;
}
public String getBucket() {
return bucket;
}

View File

@ -39,6 +39,5 @@ import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException;
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
@SuppressWarnings("deprecation")
public final class SharedInstanceCredentialProvider extends IAMInstanceCredentialsProvider {
}

View File

@ -18,9 +18,9 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.StringUtils;
@ -42,13 +42,10 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.getAWSAccessKeys;
* property fs.s3a.aws.credentials.provider. Therefore, changing the class name
* would be a backward-incompatible change.
*
* @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider
* as part of upgrading S3A to SDK V2. See HADOOP-18073.
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
@Deprecated
public class SimpleAWSCredentialsProvider implements AWSCredentialsProvider {
public class SimpleAWSCredentialsProvider implements AwsCredentialsProvider {
public static final String NAME
= "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider";
@ -80,17 +77,14 @@ public class SimpleAWSCredentialsProvider implements AWSCredentialsProvider {
}
@Override
public AWSCredentials getCredentials() {
public AwsCredentials resolveCredentials() {
if (!StringUtils.isEmpty(accessKey) && !StringUtils.isEmpty(secretKey)) {
return new BasicAWSCredentials(accessKey, secretKey);
return AwsBasicCredentials.create(accessKey, secretKey);
}
throw new NoAwsCredentialsException("SimpleAWSCredentialsProvider",
"No AWS credentials in the Hadoop configuration");
}
@Override
public void refresh() {}
@Override
public String toString() {
return getClass().getSimpleName();

View File

@ -538,6 +538,11 @@ public enum Statistic {
StoreStatisticNames.STORE_IO_THROTTLE_RATE,
"Rate of S3 request throttling",
TYPE_QUANTILE),
STORE_REGION_PROBE(
StoreStatisticNames.STORE_REGION_PROBE,
"Store Region Probe",
TYPE_DURATION
),
/*
* Delegation Token Operations.

View File

@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a;
import javax.annotation.Nullable;
import java.io.IOException;
import com.amazonaws.auth.AWSCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import java.net.URI;
@ -44,12 +44,9 @@ import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException;
* This credential provider must not fail in creation because that will
* break a chain of credential providers.
*
* @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider
* as part of upgrading S3A to SDK V2. See HADOOP-18073.
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
@Deprecated
public class TemporaryAWSCredentialsProvider extends AbstractSessionCredentialsProvider {
public static final String NAME
@ -92,7 +89,7 @@ public class TemporaryAWSCredentialsProvider extends AbstractSessionCredentialsP
* @throws NoAwsCredentialsException the credentials are actually empty.
*/
@Override
protected AWSCredentials createCredentials(Configuration config)
protected AwsCredentials createCredentials(Configuration config)
throws IOException {
MarshalledCredentials creds = MarshalledCredentialBinding.fromFileSystem(
getUri(), config);

View File

@ -18,22 +18,22 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.transfer.Upload;
import software.amazon.awssdk.transfer.s3.model.FileUpload;
/**
* Simple struct that contains information about a S3 upload.
*/
public class UploadInfo {
private final Upload upload;
private final FileUpload fileUpload;
private final long length;
public UploadInfo(Upload upload, long length) {
this.upload = upload;
public UploadInfo(FileUpload upload, long length) {
this.fileUpload = upload;
this.length = length;
}
public Upload getUpload() {
return upload;
public FileUpload getFileUpload() {
return fileUpload;
}
public long getLength() {

View File

@ -19,29 +19,28 @@
package org.apache.hadoop.fs.s3a;
import javax.annotation.Nullable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.CompleteMultipartUploadResult;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.MultipartUpload;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
import com.amazonaws.services.s3.model.SelectObjectContentResult;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.MultipartUpload;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.PutObjectResponse;
import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest;
import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.services.s3.model.UploadPartResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@ -50,6 +49,8 @@ import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.s3a.api.RequestFactory;
import org.apache.hadoop.fs.s3a.impl.PutObjectOptions;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher;
import org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper;
import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext;
import org.apache.hadoop.fs.s3a.select.SelectBinding;
import org.apache.hadoop.fs.statistics.DurationTrackerFactory;
@ -239,48 +240,24 @@ public class WriteOperationHelper implements WriteOperations {
/**
* Create a {@link PutObjectRequest} request against the specific key.
* @param destKey destination key
* @param inputStream source data.
* @param length size, if known. Use -1 for not known
* @param options options for the request
* @param isFile is data to be uploaded a file
* @return the request
*/
@Retries.OnceRaw
public PutObjectRequest createPutObjectRequest(String destKey,
InputStream inputStream,
long length,
final PutObjectOptions options) {
final PutObjectOptions options,
boolean isFile) {
activateAuditSpan();
ObjectMetadata objectMetadata = newObjectMetadata(length);
return getRequestFactory().newPutObjectRequest(
destKey,
objectMetadata,
options,
inputStream);
return getRequestFactory()
.newPutObjectRequestBuilder(destKey, options, length, false)
.build();
}
/**
* Create a {@link PutObjectRequest} request to upload a file.
* @param dest key to PUT to.
* @param sourceFile source file
* @param options options for the request
* @return the request
*/
@Retries.OnceRaw
public PutObjectRequest createPutObjectRequest(
String dest,
File sourceFile,
final PutObjectOptions options) {
activateAuditSpan();
final ObjectMetadata objectMetadata =
newObjectMetadata((int) sourceFile.length());
PutObjectRequest putObjectRequest = getRequestFactory().
newPutObjectRequest(dest,
objectMetadata,
options,
sourceFile);
return putObjectRequest;
}
/**
* Callback on a successful write.
@ -297,17 +274,6 @@ public class WriteOperationHelper implements WriteOperations {
LOG.debug("Write to {} failed", this, ex);
}
/**
* Create a new object metadata instance.
* Any standard metadata headers are added here, for example:
* encryption.
* @param length size, if known. Use -1 for not known
* @return a new metadata instance
*/
public ObjectMetadata newObjectMetadata(long length) {
return getRequestFactory().newObjectMetadata(length);
}
/**
* {@inheritDoc}
*/
@ -320,11 +286,11 @@ public class WriteOperationHelper implements WriteOperations {
try (AuditSpan span = activateAuditSpan()) {
return retry("initiate MultiPartUpload", destKey, true,
() -> {
final InitiateMultipartUploadRequest initiateMPURequest =
getRequestFactory().newMultipartUploadRequest(
final CreateMultipartUploadRequest.Builder initiateMPURequestBuilder =
getRequestFactory().newMultipartUploadRequestBuilder(
destKey, options);
return owner.initiateMultipartUpload(initiateMPURequest)
.getUploadId();
return owner.initiateMultipartUpload(initiateMPURequestBuilder.build())
.uploadId();
});
}
}
@ -345,10 +311,10 @@ public class WriteOperationHelper implements WriteOperations {
* @throws IOException on problems.
*/
@Retries.RetryTranslated
private CompleteMultipartUploadResult finalizeMultipartUpload(
private CompleteMultipartUploadResponse finalizeMultipartUpload(
String destKey,
String uploadId,
List<PartETag> partETags,
List<CompletedPart> partETags,
long length,
PutObjectOptions putOptions,
Retried retrying) throws IOException {
@ -357,18 +323,18 @@ public class WriteOperationHelper implements WriteOperations {
"No upload parts in multipart upload");
}
try (AuditSpan span = activateAuditSpan()) {
CompleteMultipartUploadResult uploadResult;
CompleteMultipartUploadResponse uploadResult;
uploadResult = invoker.retry("Completing multipart upload", destKey,
true,
retrying,
() -> {
final CompleteMultipartUploadRequest request =
getRequestFactory().newCompleteMultipartUploadRequest(
final CompleteMultipartUploadRequest.Builder requestBuilder =
getRequestFactory().newCompleteMultipartUploadRequestBuilder(
destKey, uploadId, partETags);
return writeOperationHelperCallbacks.completeMultipartUpload(request);
return writeOperationHelperCallbacks.completeMultipartUpload(requestBuilder.build());
});
owner.finishedWrite(destKey, length, uploadResult.getETag(),
uploadResult.getVersionId(),
owner.finishedWrite(destKey, length, uploadResult.eTag(),
uploadResult.versionId(),
putOptions);
return uploadResult;
}
@ -391,10 +357,10 @@ public class WriteOperationHelper implements WriteOperations {
* the retry count was exceeded
*/
@Retries.RetryTranslated
public CompleteMultipartUploadResult completeMPUwithRetries(
public CompleteMultipartUploadResponse completeMPUwithRetries(
String destKey,
String uploadId,
List<PartETag> partETags,
List<CompletedPart> partETags,
long length,
AtomicInteger errorCount,
PutObjectOptions putOptions)
@ -452,7 +418,7 @@ public class WriteOperationHelper implements WriteOperations {
@Retries.RetryTranslated
public void abortMultipartUpload(MultipartUpload upload)
throws IOException {
invoker.retry("Aborting multipart commit", upload.getKey(), true,
invoker.retry("Aborting multipart commit", upload.key(), true,
withinAuditSpan(getAuditSpan(),
() -> owner.abortMultipartUpload(upload)));
}
@ -477,7 +443,7 @@ public class WriteOperationHelper implements WriteOperations {
abortMultipartUpload(upload);
count++;
} catch (FileNotFoundException e) {
LOG.debug("Already aborted: {}", upload.getKey(), e);
LOG.debug("Already aborted: {}", upload.key(), e);
}
}
return count;
@ -506,45 +472,31 @@ public class WriteOperationHelper implements WriteOperations {
}
/**
* Create and initialize a part request of a multipart upload.
* Exactly one of: {@code uploadStream} or {@code sourceFile}
* must be specified.
* A subset of the file may be posted, by providing the starting point
* in {@code offset} and a length of block in {@code size} equal to
* or less than the remaining bytes.
* Create and initialize a part request builder of a multipart upload.
* The part number must be less than 10000.
* Retry policy is once-translated; to much effort
* @param destKey destination key of ongoing operation
* @param uploadId ID of ongoing upload
* @param partNumber current part number of the upload
* @param size amount of data
* @param uploadStream source of data to upload
* @param sourceFile optional source file.
* @param offset offset in file to start reading.
* @return the request.
* @return the request builder.
* @throws IllegalArgumentException if the parameters are invalid.
* @throws PathIOException if the part number is out of range.
*/
@Override
@Retries.OnceTranslated
public UploadPartRequest newUploadPartRequest(
public UploadPartRequest.Builder newUploadPartRequestBuilder(
String destKey,
String uploadId,
int partNumber,
long size,
InputStream uploadStream,
File sourceFile,
Long offset) throws IOException {
long size) throws IOException {
return once("upload part request", destKey,
withinAuditSpan(getAuditSpan(), () ->
getRequestFactory().newUploadPartRequest(
getRequestFactory().newUploadPartRequestBuilder(
destKey,
uploadId,
partNumber,
size,
uploadStream,
sourceFile,
offset)));
size)));
}
/**
@ -566,18 +518,20 @@ public class WriteOperationHelper implements WriteOperations {
* @param putObjectRequest the request
* @param putOptions put object options
* @param durationTrackerFactory factory for duration tracking
* @param uploadData data to be uploaded
* @param isFile is data to be uploaded a file
*
* @return the upload initiated
* @throws IOException on problems
*/
@Retries.RetryTranslated
public PutObjectResult putObject(PutObjectRequest putObjectRequest,
PutObjectOptions putOptions,
public PutObjectResponse putObject(PutObjectRequest putObjectRequest,
PutObjectOptions putOptions, S3ADataBlocks.BlockUploadData uploadData, boolean isFile,
DurationTrackerFactory durationTrackerFactory)
throws IOException {
return retry("Writing Object",
putObjectRequest.getKey(), true,
withinAuditSpan(getAuditSpan(), () ->
owner.putObjectDirect(putObjectRequest, putOptions, durationTrackerFactory)));
return retry("Writing Object", putObjectRequest.key(), true, withinAuditSpan(getAuditSpan(),
() -> owner.putObjectDirect(putObjectRequest, putOptions, uploadData, isFile,
durationTrackerFactory)));
}
/**
@ -613,10 +567,10 @@ public class WriteOperationHelper implements WriteOperations {
* the retry count was exceeded
*/
@Retries.RetryTranslated
public CompleteMultipartUploadResult commitUpload(
public CompleteMultipartUploadResponse commitUpload(
String destKey,
String uploadId,
List<PartETag> partETags,
List<CompletedPart> partETags,
long length)
throws IOException {
checkNotNull(uploadId);
@ -635,19 +589,21 @@ public class WriteOperationHelper implements WriteOperations {
* Upload part of a multi-partition file.
* @param request request
* @param durationTrackerFactory duration tracker factory for operation
* @param request the upload part request.
* @param body the request body.
* @return the result of the operation.
* @throws IOException on problems
*/
@Retries.RetryTranslated
public UploadPartResult uploadPart(UploadPartRequest request,
public UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body,
final DurationTrackerFactory durationTrackerFactory)
throws IOException {
return retry("upload part #" + request.getPartNumber()
+ " upload ID " + request.getUploadId(),
request.getKey(),
return retry("upload part #" + request.partNumber()
+ " upload ID " + request.uploadId(),
request.key(),
true,
withinAuditSpan(getAuditSpan(),
() -> owner.uploadPart(request, durationTrackerFactory)));
() -> owner.uploadPart(request, body, durationTrackerFactory)));
}
/**
@ -659,15 +615,9 @@ public class WriteOperationHelper implements WriteOperations {
return conf;
}
/**
* Create a S3 Select request for the destination path.
* This does not build the query.
* @param path pre-qualified path for query
* @return the request
*/
public SelectObjectContentRequest newSelectRequest(Path path) {
public SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path) {
try (AuditSpan span = getAuditSpan()) {
return getRequestFactory().newSelectRequest(
return getRequestFactory().newSelectRequestBuilder(
storeContext.pathToKey(path));
}
}
@ -676,26 +626,27 @@ public class WriteOperationHelper implements WriteOperations {
* Execute an S3 Select operation.
* On a failure, the request is only logged at debug to avoid the
* select exception being printed.
* @param source source for selection
*
* @param source source for selection
* @param request Select request to issue.
* @param action the action for use in exception creation
* @param action the action for use in exception creation
* @return response
* @throws IOException failure
*/
@Retries.RetryTranslated
public SelectObjectContentResult select(
public SelectEventStreamPublisher select(
final Path source,
final SelectObjectContentRequest request,
final String action)
throws IOException {
// no setting of span here as the select binding is (statically) created
// without any span.
String bucketName = request.getBucketName();
String bucketName = request.bucket();
Preconditions.checkArgument(bucket.equals(bucketName),
"wrong bucket: %s", bucketName);
if (LOG.isDebugEnabled()) {
LOG.debug("Initiating select call {} {}",
source, request.getExpression());
source, request.expression());
LOG.debug(SelectBinding.toString(request));
}
return invoker.retry(
@ -706,8 +657,9 @@ public class WriteOperationHelper implements WriteOperations {
try (DurationInfo ignored =
new DurationInfo(LOG, "S3 Select operation")) {
try {
return writeOperationHelperCallbacks.selectObjectContent(request);
} catch (AmazonS3Exception e) {
return SelectObjectContentHelper.select(
writeOperationHelperCallbacks, source, request, action);
} catch (Throwable e) {
LOG.error("Failure of S3 Select request against {}",
source);
LOG.debug("S3 Select request against {}:\n{}",
@ -756,16 +708,18 @@ public class WriteOperationHelper implements WriteOperations {
/**
* Initiates a select request.
* @param request selectObjectContent request
* @param t selectObjectContent request handler
* @return selectObjectContentResult
*/
SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request);
CompletableFuture<Void> selectObjectContent(SelectObjectContentRequest request,
SelectObjectContentResponseHandler t);
/**
* Initiates a complete multi-part upload request.
* @param request Complete multi-part upload request
* @return completeMultipartUploadResult
*/
CompleteMultipartUploadResult completeMultipartUpload(CompleteMultipartUploadRequest request);
CompleteMultipartUploadResponse completeMultipartUpload(CompleteMultipartUploadRequest request);
}

View File

@ -20,29 +20,28 @@ package org.apache.hadoop.fs.s3a;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import com.amazonaws.services.s3.model.CompleteMultipartUploadResult;
import com.amazonaws.services.s3.model.MultipartUpload;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
import com.amazonaws.services.s3.model.SelectObjectContentResult;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.MultipartUpload;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.PutObjectResponse;
import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.services.s3.model.UploadPartResponse;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.s3a.impl.PutObjectOptions;
import org.apache.hadoop.fs.statistics.DurationTrackerFactory;
import org.apache.hadoop.fs.store.audit.AuditSpan;
import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher;
import org.apache.hadoop.fs.store.audit.AuditSpanSource;
import org.apache.hadoop.util.functional.CallableRaisingIOE;
@ -77,27 +76,15 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
/**
* Create a {@link PutObjectRequest} request against the specific key.
* @param destKey destination key
* @param inputStream source data.
* @param length size, if known. Use -1 for not known
* @param options options for the request
* @param isFile is data to be uploaded a file
* @return the request
*/
PutObjectRequest createPutObjectRequest(String destKey,
InputStream inputStream,
long length,
@Nullable PutObjectOptions options);
/**
* Create a {@link PutObjectRequest} request to upload a file.
* @param dest key to PUT to.
* @param sourceFile source file
* @param options options for the request
* @return the request
*/
PutObjectRequest createPutObjectRequest(
String dest,
File sourceFile,
@Nullable PutObjectOptions options);
@Nullable PutObjectOptions options,
boolean isFile);
/**
* Callback on a successful write.
@ -111,15 +98,6 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
*/
void writeFailed(Exception ex);
/**
* Create a new object metadata instance.
* Any standard metadata headers are added here, for example:
* encryption.
* @param length size, if known. Use -1 for not known
* @return a new metadata instance
*/
ObjectMetadata newObjectMetadata(long length);
/**
* Start the multipart upload process.
* Retry policy: retrying, translated.
@ -148,10 +126,10 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
* the retry count was exceeded
*/
@Retries.RetryTranslated
CompleteMultipartUploadResult completeMPUwithRetries(
CompleteMultipartUploadResponse completeMPUwithRetries(
String destKey,
String uploadId,
List<PartETag> partETags,
List<CompletedPart> partETags,
long length,
AtomicInteger errorCount,
PutObjectOptions putOptions)
@ -213,31 +191,20 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
throws IOException;
/**
* Create and initialize a part request of a multipart upload.
* Exactly one of: {@code uploadStream} or {@code sourceFile}
* must be specified.
* A subset of the file may be posted, by providing the starting point
* in {@code offset} and a length of block in {@code size} equal to
* or less than the remaining bytes.
* Create and initialize a part request builder of a multipart upload.
* @param destKey destination key of ongoing operation
* @param uploadId ID of ongoing upload
* @param partNumber current part number of the upload
* @param size amount of data
* @param uploadStream source of data to upload
* @param sourceFile optional source file.
* @param offset offset in file to start reading.
* @return the request.
* @return the request builder.
* @throws IllegalArgumentException if the parameters are invalid
* @throws PathIOException if the part number is out of range.
*/
UploadPartRequest newUploadPartRequest(
UploadPartRequest.Builder newUploadPartRequestBuilder(
String destKey,
String uploadId,
int partNumber,
long size,
InputStream uploadStream,
File sourceFile,
Long offset) throws IOException;
long size) throws IOException;
/**
* PUT an object directly (i.e. not via the transfer manager).
@ -246,12 +213,14 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
* @param putObjectRequest the request
* @param putOptions put object options
* @param durationTrackerFactory factory for duration tracking
* @param uploadData data to be uploaded
* @param isFile is data to be uploaded a file
* @return the upload initiated
* @throws IOException on problems
*/
@Retries.RetryTranslated
PutObjectResult putObject(PutObjectRequest putObjectRequest,
PutObjectOptions putOptions,
PutObjectResponse putObject(PutObjectRequest putObjectRequest,
PutObjectOptions putOptions, S3ADataBlocks.BlockUploadData uploadData, boolean isFile,
DurationTrackerFactory durationTrackerFactory)
throws IOException;
@ -279,22 +248,23 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
* the retry count was exceeded
*/
@Retries.RetryTranslated
CompleteMultipartUploadResult commitUpload(
CompleteMultipartUploadResponse commitUpload(
String destKey,
String uploadId,
List<PartETag> partETags,
List<CompletedPart> partETags,
long length)
throws IOException;
/**
* Upload part of a multi-partition file.
* @param request request
* @param request the upload part request.
* @param body the request body.
* @param durationTrackerFactory factory for duration tracking
* @return the result of the operation.
* @throws IOException on problems
*/
@Retries.RetryTranslated
UploadPartResult uploadPart(UploadPartRequest request,
UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body,
DurationTrackerFactory durationTrackerFactory)
throws IOException;
@ -306,25 +276,32 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
Configuration getConf();
/**
* Create a S3 Select request for the destination path.
* Get the audit span this object was created with.
* @return the audit span
*/
AuditSpan getAuditSpan();
/**
* Create a S3 Select request builder for the destination path.
* This does not build the query.
* @param path pre-qualified path for query
* @return the request
* @return the request builder
*/
SelectObjectContentRequest newSelectRequest(Path path);
SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path);
/**
* Execute an S3 Select operation.
* On a failure, the request is only logged at debug to avoid the
* select exception being printed.
* @param source source for selection
*
* @param source source for selection
* @param request Select request to issue.
* @param action the action for use in exception creation
* @param action the action for use in exception creation
* @return response
* @throws IOException failure
*/
@Retries.RetryTranslated
SelectObjectContentResult select(
SelectEventStreamPublisher select(
Path source,
SelectObjectContentRequest request,
String action)

View File

@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.adapter;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSSessionCredentials;
import com.amazonaws.auth.AnonymousAWSCredentials;
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
/**
* Adapts a V1 {@link AWSCredentialsProvider} to the V2 {@link AwsCredentialsProvider} interface.
* Implements both interfaces so can be used with either the V1 or V2 AWS SDK.
*/
final class V1ToV2AwsCredentialProviderAdapter implements V1V2AwsCredentialProviderAdapter {
private final AWSCredentialsProvider v1CredentialsProvider;
private V1ToV2AwsCredentialProviderAdapter(AWSCredentialsProvider v1CredentialsProvider) {
this.v1CredentialsProvider = v1CredentialsProvider;
}
@Override
public AwsCredentials resolveCredentials() {
AWSCredentials toAdapt = v1CredentialsProvider.getCredentials();
if (toAdapt instanceof AWSSessionCredentials) {
return AwsSessionCredentials.create(toAdapt.getAWSAccessKeyId(),
toAdapt.getAWSSecretKey(),
((AWSSessionCredentials) toAdapt).getSessionToken());
} else if (toAdapt instanceof AnonymousAWSCredentials) {
return AnonymousCredentialsProvider.create().resolveCredentials();
} else {
return AwsBasicCredentials.create(toAdapt.getAWSAccessKeyId(), toAdapt.getAWSSecretKey());
}
}
@Override
public AWSCredentials getCredentials() {
return v1CredentialsProvider.getCredentials();
}
@Override
public void refresh() {
v1CredentialsProvider.refresh();
}
/**
* @param v1CredentialsProvider V1 credential provider to adapt.
* @return A new instance of the credentials provider adapter.
*/
static V1ToV2AwsCredentialProviderAdapter create(AWSCredentialsProvider v1CredentialsProvider) {
return new V1ToV2AwsCredentialProviderAdapter(v1CredentialsProvider);
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.adapter;
import com.amazonaws.auth.AWSCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
public interface V1V2AwsCredentialProviderAdapter extends AWSCredentialsProvider,
AwsCredentialsProvider {
/**
* Creates a two-way adapter from a V1 {@link AWSCredentialsProvider} interface.
*
* @param v1CredentialsProvider V1 credentials provider.
* @return Two-way credential provider adapter.
*/
static V1V2AwsCredentialProviderAdapter adapt(AWSCredentialsProvider v1CredentialsProvider) {
return V1ToV2AwsCredentialProviderAdapter.create(v1CredentialsProvider);
}
}

View File

@ -0,0 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Adapter classes for allowing V1 credential providers to be used with SDKV2.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
package org.apache.hadoop.fs.s3a.adapter;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

View File

@ -19,33 +19,27 @@
package org.apache.hadoop.fs.s3a.api;
import javax.annotation.Nullable;
import java.io.File;
import java.io.InputStream;
import java.util.List;
import java.util.Optional;
import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.ListMultipartUploadsRequest;
import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
import com.amazonaws.services.s3.model.SSECustomerKey;
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
import com.amazonaws.services.s3.model.StorageClass;
import com.amazonaws.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.CopyObjectRequest;
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest;
import software.amazon.awssdk.services.s3.model.ListObjectsRequest;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest;
import software.amazon.awssdk.services.s3.model.StorageClass;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.s3a.S3AEncryptionMethods;
@ -79,22 +73,7 @@ public interface RequestFactory {
* Get the canned ACL of this FS.
* @return an ACL, if any
*/
CannedAccessControlList getCannedACL();
/**
* Create the AWS SDK structure used to configure SSE,
* if the encryption secrets contain the information/settings for this.
* @return an optional set of KMS Key settings
*/
Optional<SSEAwsKeyManagementParams> generateSSEAwsKeyParams();
/**
* Create the SSE-C structure for the AWS SDK, if the encryption secrets
* contain the information/settings for this.
* This will contain a secret extracted from the bucket/configuration.
* @return an optional customer key.
*/
Optional<SSECustomerKey> generateSSECustomerKey();
String getCannedACL();
/**
* Get the encryption algorithm of this endpoint.
@ -115,79 +94,58 @@ public interface RequestFactory {
StorageClass getStorageClass();
/**
* Create a new object metadata instance.
* Any standard metadata headers are added here, for example:
* encryption.
*
* @param length length of data to set in header; Ignored if negative
* @return a new metadata instance
*/
ObjectMetadata newObjectMetadata(long length);
/**
* Create a copy request.
* Create a copy request builder.
* This includes the work of copying the relevant parts
* of the metadata from the source
* @param srcKey source
* @param dstKey destination
* @param srcom source object metadata.
* @return the request
* @return the request builder
*/
CopyObjectRequest newCopyObjectRequest(String srcKey,
CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey,
String dstKey,
ObjectMetadata srcom);
HeadObjectResponse srcom);
/**
* Create a putObject request.
* Adds the ACL and metadata
* @param key key of object
* @param metadata metadata header
* @param options options for the request
* @param srcfile source file
* @return the request
*/
PutObjectRequest newPutObjectRequest(String key,
ObjectMetadata metadata, PutObjectOptions options, File srcfile);
/**
* Create a {@link PutObjectRequest} request.
* Create a {@link PutObjectRequest} request builder.
* The metadata is assumed to have been configured with the size of the
* operation.
* @param key key of object
* @param metadata metadata header
* @param options options for the request
* @param inputStream source data.
* @return the request
* @param length length of object to be uploaded
* @param isDirectoryMarker true if object to be uploaded is a directory marker
* @return the request builder
*/
PutObjectRequest newPutObjectRequest(String key,
ObjectMetadata metadata,
PutObjectRequest.Builder newPutObjectRequestBuilder(String key,
PutObjectOptions options,
InputStream inputStream);
long length,
boolean isDirectoryMarker);
/**
* Create a {@link PutObjectRequest} request for creating
* an empty directory.
*
* @param directory destination directory.
* @return request for a zero byte upload.
* @return request builder for a zero byte upload.
*/
PutObjectRequest newDirectoryMarkerRequest(String directory);
PutObjectRequest.Builder newDirectoryMarkerRequest(String directory);
/**
* List all multipart uploads under a prefix.
* @param prefix prefix to list under
* @return the request.
* @return the request builder.
*/
ListMultipartUploadsRequest newListMultipartUploadsRequest(
ListMultipartUploadsRequest.Builder newListMultipartUploadsRequestBuilder(
@Nullable String prefix);
/**
* Abort a multipart upload.
* @param destKey destination object key
* @param uploadId ID of initiated upload
* @return the request.
* @return the request builder.
*/
AbortMultipartUploadRequest newAbortMultipartUploadRequest(
AbortMultipartUploadRequest.Builder newAbortMultipartUploadRequestBuilder(
String destKey,
String uploadId);
@ -195,10 +153,10 @@ public interface RequestFactory {
* Start a multipart upload.
* @param destKey destination object key
* @param options options for the request
* @return the request.
* @return the request builder.
* @throws PathIOException if multipart uploads are disabled
*/
InitiateMultipartUploadRequest newMultipartUploadRequest(
CreateMultipartUploadRequest.Builder newMultipartUploadRequestBuilder(
String destKey,
@Nullable PutObjectOptions options) throws PathIOException;
@ -207,107 +165,95 @@ public interface RequestFactory {
* @param destKey destination object key
* @param uploadId ID of initiated upload
* @param partETags ordered list of etags
* @return the request.
* @return the request builder.
*/
CompleteMultipartUploadRequest newCompleteMultipartUploadRequest(
CompleteMultipartUploadRequest.Builder newCompleteMultipartUploadRequestBuilder(
String destKey,
String uploadId,
List<PartETag> partETags);
List<CompletedPart> partETags);
/**
* Create a HEAD request.
* Create a HEAD object request builder.
* @param key key, may have trailing /
* @return the request.
* @return the request builder.
*/
GetObjectMetadataRequest newGetObjectMetadataRequest(String key);
HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key);
/**
* Create a GET request.
* Create a HEAD bucket request builder.
* @param bucket bucket to get metadata for
* @return the request builder.
*/
HeadBucketRequest.Builder newHeadBucketRequestBuilder(String bucket);
/**
* Create a GET request builder.
* @param key object key
* @return the request.
* @return the request builder.
*/
GetObjectRequest newGetObjectRequest(String key);
GetObjectRequest.Builder newGetObjectRequestBuilder(String key);
/**
* Create and initialize a part request of a multipart upload.
* Exactly one of: {@code uploadStream} or {@code sourceFile}
* must be specified.
* A subset of the file may be posted, by providing the starting point
* in {@code offset} and a length of block in {@code size} equal to
* or less than the remaining bytes.
* @param destKey destination key of ongoing operation
* @param uploadId ID of ongoing upload
* @param partNumber current part number of the upload
* @param size amount of data
* @param uploadStream source of data to upload
* @param sourceFile optional source file.
* @param offset offset in file to start reading.
* @return the request.
* Create and initialize a part request builder of a multipart upload.
*
* @param destKey destination key of ongoing operation
* @param uploadId ID of ongoing upload
* @param partNumber current part number of the upload
* @param size amount of data
* @return the request builder.
* @throws PathIOException if the part number is out of range.
*/
UploadPartRequest newUploadPartRequest(
UploadPartRequest.Builder newUploadPartRequestBuilder(
String destKey,
String uploadId,
int partNumber,
long size,
InputStream uploadStream,
File sourceFile,
long offset) throws PathIOException;
long size) throws PathIOException;
/**
* Create a S3 Select request for the destination object.
* Create a S3 Select request builder for the destination object.
* This does not build the query.
* @param key object key
* @return the request
* @return the request builder
*/
SelectObjectContentRequest newSelectRequest(String key);
SelectObjectContentRequest.Builder newSelectRequestBuilder(String key);
/**
* Create the (legacy) V1 list request.
* Create the (legacy) V1 list request builder.
* @param key key to list under
* @param delimiter delimiter for keys
* @param maxKeys maximum number in a list page.
* @return the request
* @return the request builder.
*/
ListObjectsRequest newListObjectsV1Request(String key,
ListObjectsRequest.Builder newListObjectsV1RequestBuilder(String key,
String delimiter,
int maxKeys);
/**
* Create the next V1 page list request, following
* on from the previous response.
* @param prev previous response
* @return the request
*/
ListNextBatchOfObjectsRequest newListNextBatchOfObjectsRequest(
ObjectListing prev);
/**
* Create a V2 list request.
* Create a V2 list request builder.
* This will be recycled for any subsequent requests.
* @param key key to list under
* @param delimiter delimiter for keys
* @param maxKeys maximum number in a list page.
* @return the request
* @return the request builder.
*/
ListObjectsV2Request newListObjectsV2Request(String key,
ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(String key,
String delimiter,
int maxKeys);
/**
* Create a request to delete a single object.
* Create a request builder to delete a single object.
* @param key object to delete
* @return the request
* @return the request builder.
*/
DeleteObjectRequest newDeleteObjectRequest(String key);
DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key);
/**
* Bulk delete request.
* Create a request builder to delete objects in bulk.
* @param keysToDelete list of keys to delete.
* @return the request
* @return the request builder.
*/
DeleteObjectsRequest newBulkDeleteRequest(
List<DeleteObjectsRequest.KeyVersion> keysToDelete);
DeleteObjectsRequest.Builder newBulkDeleteRequestBuilder(
List<ObjectIdentifier> keysToDelete);
}

View File

@ -18,15 +18,9 @@
package org.apache.hadoop.fs.s3a.audit;
import com.amazonaws.AmazonWebServiceRequest;
import com.amazonaws.Request;
import com.amazonaws.Response;
import com.amazonaws.SdkBaseException;
import com.amazonaws.handlers.HandlerAfterAttemptContext;
import com.amazonaws.handlers.HandlerBeforeAttemptContext;
import com.amazonaws.http.HttpResponse;
import software.amazon.awssdk.core.SdkRequest;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import org.apache.hadoop.fs.s3a.Retries;
/**
* Callbacks for audit spans. This is implemented
@ -37,10 +31,10 @@ import org.apache.hadoop.fs.s3a.Retries;
* detect this and raise an exception.
*
* Look at the documentation for
* {@code com.amazonaws.handlers.IRequestHandler2} for details
* {@code ExecutionInterceptor} for details
* on the callbacks.
*/
public interface AWSAuditEventCallbacks {
public interface AWSAuditEventCallbacks extends ExecutionInterceptor {
/**
* Return a span ID which must be unique for all spans within
@ -66,95 +60,8 @@ public interface AWSAuditEventCallbacks {
* It is not invoked on any AWS requests created in the SDK.
* Avoid raising exceptions or talking to any remote service;
* this callback is for annotation rather than validation.
* @param request request request.
* @param <T> type of request
* @return the request, possibly modified.
* @param builder the request builder.
*/
default <T extends AmazonWebServiceRequest> T requestCreated(T request) {
return request;
}
default void requestCreated(SdkRequest.Builder builder) {}
/**
* Preflight preparation of AWS request.
* @param request request
* @param <T> type of request
* @return an updated request.
* @throws AuditFailureException for generic audit failures
* @throws SdkBaseException for other reasons.
*/
@Retries.OnceRaw
default <T extends AmazonWebServiceRequest> T beforeExecution(T request)
throws AuditFailureException, SdkBaseException {
return request;
}
/**
* Callback after S3 responded to a request.
* @param request request
* @param response response.
* @throws AuditFailureException for generic audit failures
* @throws SdkBaseException for other reasons.
*/
default void afterResponse(Request<?> request,
Response<?> response)
throws AuditFailureException, SdkBaseException {
}
/**
* Callback after a request resulted in an error.
* @param request request
* @param response response.
* @param exception exception raised.
* @throws AuditFailureException for generic audit failures
* @throws SdkBaseException for other reasons.
*/
default void afterError(Request<?> request,
Response<?> response,
Exception exception)
throws AuditFailureException, SdkBaseException {
}
/**
* Request before marshalling.
* @param request request
* @return possibly modified request.
*/
default AmazonWebServiceRequest beforeMarshalling(
AmazonWebServiceRequest request) {
return request;
}
/**
* Request before marshalling.
* @param request request
*/
default void beforeRequest(Request<?> request) {
}
/**
* Before any attempt is made.
* @param context full context, including the request.
*/
default void beforeAttempt(HandlerBeforeAttemptContext context) {
}
/**
* After any attempt is made.
* @param context full context, including the request.
*/
default void afterAttempt(
HandlerAfterAttemptContext context) {
}
/**
* Before unmarshalling the response.
* @param request request made.
* @param httpResponse response received
* @return updated response.
*/
default HttpResponse beforeUnmarshalling(
final Request<?> request,
final HttpResponse httpResponse) {
return httpResponse;
}
}

View File

@ -20,24 +20,23 @@ package org.apache.hadoop.fs.s3a.audit;
import java.util.List;
import com.amazonaws.AmazonWebServiceRequest;
import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.CopyPartRequest;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.GetBucketLocationRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.ListMultipartUploadsRequest;
import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
import com.amazonaws.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.core.SdkRequest;
import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest;
import software.amazon.awssdk.services.s3.model.ListObjectsRequest;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest;
import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_HTTP_GET_REQUEST;
import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_HTTP_HEAD_REQUEST;
@ -64,102 +63,85 @@ public class AWSRequestAnalyzer {
* read/write and path.
* @param request request.
* @return information about the request.
* @param <T> type of request.
*/
public <T extends AmazonWebServiceRequest> RequestInfo analyze(T request) {
public RequestInfo analyze(SdkRequest request) {
// this is where Scala's case statement would massively
// simplify life.
// Please Keep in Alphabetical Order.
if (request instanceof AbortMultipartUploadRequest) {
return writing(MULTIPART_UPLOAD_ABORTED,
((AbortMultipartUploadRequest) request).getKey(),
((AbortMultipartUploadRequest) request).key(),
0);
} else if (request instanceof CompleteMultipartUploadRequest) {
CompleteMultipartUploadRequest r
= (CompleteMultipartUploadRequest) request;
return writing(MULTIPART_UPLOAD_COMPLETED,
r.getKey(),
r.getPartETags().size());
r.key(),
r.multipartUpload().parts().size());
} else if (request instanceof CreateMultipartUploadRequest) {
return writing(MULTIPART_UPLOAD_STARTED,
((CreateMultipartUploadRequest) request).key(),
0);
} else if (request instanceof DeleteObjectRequest) {
// DeleteObject: single object
return writing(OBJECT_DELETE_REQUEST,
((DeleteObjectRequest) request).getKey(),
((DeleteObjectRequest) request).key(),
1);
} else if (request instanceof DeleteObjectsRequest) {
// DeleteObjects: bulk delete
// use first key as the path
DeleteObjectsRequest r = (DeleteObjectsRequest) request;
List<DeleteObjectsRequest.KeyVersion> keys
= r.getKeys();
List<ObjectIdentifier> objectIdentifiers
= r.delete().objects();
return writing(OBJECT_BULK_DELETE_REQUEST,
keys.isEmpty() ? null : keys.get(0).getKey(),
keys.size());
objectIdentifiers.isEmpty() ? null : objectIdentifiers.get(0).key(),
objectIdentifiers.size());
} else if (request instanceof GetBucketLocationRequest) {
GetBucketLocationRequest r = (GetBucketLocationRequest) request;
return reading(STORE_EXISTS_PROBE,
r.getBucketName(),
r.bucket(),
0);
} else if (request instanceof GetObjectMetadataRequest) {
return reading(ACTION_HTTP_HEAD_REQUEST,
((GetObjectMetadataRequest) request).getKey(), 0);
} else if (request instanceof GetObjectRequest) {
GetObjectRequest r = (GetObjectRequest) request;
long[] range = r.getRange();
long size = range == null
? -1
: range[1] - range[0];
return reading(ACTION_HTTP_GET_REQUEST,
r.getKey(),
size);
} else if (request instanceof InitiateMultipartUploadRequest) {
return writing(MULTIPART_UPLOAD_STARTED,
((InitiateMultipartUploadRequest) request).getKey(),
0);
r.key(),
sizeFromRangeHeader(r.range()));
} else if (request instanceof HeadObjectRequest) {
return reading(ACTION_HTTP_HEAD_REQUEST,
((HeadObjectRequest) request).key(), 0);
} else if (request instanceof ListMultipartUploadsRequest) {
ListMultipartUploadsRequest r
= (ListMultipartUploadsRequest) request;
return reading(MULTIPART_UPLOAD_LIST,
r.getPrefix(),
r.getMaxUploads());
r.prefix(),
r.maxUploads());
} else if (request instanceof ListObjectsRequest) {
ListObjectsRequest r = (ListObjectsRequest) request;
return reading(OBJECT_LIST_REQUEST,
r.getPrefix(),
r.getMaxKeys());
} else if (request instanceof ListNextBatchOfObjectsRequest) {
ListNextBatchOfObjectsRequest r = (ListNextBatchOfObjectsRequest) request;
ObjectListing l = r.getPreviousObjectListing();
String prefix = "";
int size = 0;
if (l != null) {
prefix = l.getPrefix();
size = l.getMaxKeys();
}
return reading(OBJECT_LIST_REQUEST,
prefix,
size);
r.prefix(),
r.maxKeys());
} else if (request instanceof ListObjectsV2Request) {
ListObjectsV2Request r = (ListObjectsV2Request) request;
return reading(OBJECT_LIST_REQUEST,
r.getPrefix(),
r.getMaxKeys());
r.prefix(),
r.maxKeys());
} else if (request instanceof PutObjectRequest) {
PutObjectRequest r = (PutObjectRequest) request;
return writing(OBJECT_PUT_REQUEST,
r.getKey(),
r.key(),
0);
} else if (request instanceof SelectObjectContentRequest) {
SelectObjectContentRequest r =
(SelectObjectContentRequest) request;
return reading(OBJECT_SELECT_REQUESTS,
r.getKey(),
r.key(),
1);
} else if (request instanceof UploadPartRequest) {
UploadPartRequest r = (UploadPartRequest) request;
return writing(MULTIPART_UPLOAD_PART_PUT,
r.getKey(),
r.getPartSize());
r.key(),
r.contentLength());
}
// no explicit support, return classname
return writing(request.getClass().getName(), null, 0);
@ -212,7 +194,7 @@ public class AWSRequestAnalyzer {
*/
public static boolean
isRequestNotAlwaysInSpan(final Object request) {
return request instanceof CopyPartRequest
return request instanceof UploadPartCopyRequest
|| request instanceof CompleteMultipartUploadRequest
|| request instanceof GetBucketLocationRequest;
}
@ -225,9 +207,9 @@ public class AWSRequestAnalyzer {
* @return true if the transfer manager creates them.
*/
public static boolean isRequestMultipartIO(final Object request) {
return request instanceof CopyPartRequest
return request instanceof UploadPartCopyRequest
|| request instanceof CompleteMultipartUploadRequest
|| request instanceof InitiateMultipartUploadRequest
|| request instanceof CreateMultipartUploadRequest
|| request instanceof UploadPartRequest;
}
@ -307,4 +289,23 @@ public class AWSRequestAnalyzer {
private static long toSafeLong(final Number size) {
return size != null ? size.longValue() : 0;
}
private static final String BYTES_PREFIX = "bytes=";
private static Number sizeFromRangeHeader(String rangeHeader) {
if (rangeHeader != null && rangeHeader.startsWith(BYTES_PREFIX)) {
String[] values = rangeHeader
.substring(BYTES_PREFIX.length())
.split("-");
if (values.length == 2) {
try {
long start = Long.parseUnsignedLong(values[0]);
long end = Long.parseUnsignedLong(values[0]);
return end - start;
} catch(NumberFormatException e) {
}
}
}
return -1;
}
}

View File

@ -23,7 +23,7 @@ import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.nio.file.AccessDeniedException;
import com.amazonaws.HandlerContextAware;
import software.amazon.awssdk.core.interceptor.ExecutionAttributes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -34,10 +34,11 @@ import org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor;
import org.apache.hadoop.fs.s3a.audit.impl.NoopAuditManagerS3A;
import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED_DEFAULT;
import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_HANDLER_CONTEXT;
import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_EXECUTION_ATTRIBUTE;
/**
* Support for integrating auditing within the S3A code.
@ -123,25 +124,24 @@ public final class AuditIntegration {
}
/**
* Get the span from a handler context.
* @param request request
* @param <T> type of request.
* Get the span from the execution attributes.
* @param executionAttributes the execution attributes
* @return the span callbacks or null
*/
public static <T extends HandlerContextAware> AWSAuditEventCallbacks
retrieveAttachedSpan(final T request) {
return request.getHandlerContext(AUDIT_SPAN_HANDLER_CONTEXT);
public static AuditSpanS3A
retrieveAttachedSpan(final ExecutionAttributes executionAttributes) {
return executionAttributes.getAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE);
}
/**
* Attach a span to a handler context.
* @param request request
* Attach a span to the execution attributes.
* @param executionAttributes the execution attributes
* @param span span to attach
* @param <T> type of request.
*/
public static <T extends HandlerContextAware> void attachSpanToRequest(
final T request, final AWSAuditEventCallbacks span) {
request.addHandlerContext(AUDIT_SPAN_HANDLER_CONTEXT, span);
public static void attachSpanToRequest(
final ExecutionAttributes executionAttributes,
final AuditSpanS3A span) {
executionAttributes.putAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE, span);
}
/**

View File

@ -21,8 +21,8 @@ package org.apache.hadoop.fs.s3a.audit;
import java.io.IOException;
import java.util.List;
import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import software.amazon.awssdk.transfer.s3.progress.TransferListener;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.Path;
@ -32,6 +32,7 @@ import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource;
import org.apache.hadoop.fs.store.audit.AuditSpanSource;
import org.apache.hadoop.service.Service;
/**
* Interface for Audit Managers auditing operations through the
* AWS libraries.
@ -56,24 +57,24 @@ public interface AuditManagerS3A extends Service,
OperationAuditor getAuditor();
/**
* Create the request handler(s) for this audit service.
* The list returned is mutable; new handlers may be added.
* @return list of handlers for the SDK.
* Create the execution interceptor(s) for this audit service.
* The list returned is mutable; new interceptors may be added.
* @return list of interceptors for the SDK.
* @throws IOException failure.
*/
List<RequestHandler2> createRequestHandlers() throws IOException;
List<ExecutionInterceptor> createExecutionInterceptors() throws IOException;
/**
* Return a transfer state change callback which
* Return a transfer callback which
* fixes the active span context to be that in which
* the state change listener was created.
* the transfer listener was created.
* This can be used to audit the creation of the multipart
* upload initiation request which the transfer manager
* makes when a file to be copied is split up.
* This must be invoked/used within the active span.
* @return a state change listener.
* @return a transfer listener.
*/
TransferStateChangeListener createStateChangeListener();
TransferListener createTransferListener();
/**
* Check for permission to access a path.

View File

@ -66,13 +66,20 @@ public final class S3AAuditConstants {
"org.apache.hadoop.fs.s3a.audit.impl.NoopAuditor";
/**
* List of extra AWS SDK request handlers: {@value}.
* These are added to the SDK request chain <i>after</i>
* any audit service.
* Deprecated list of extra AWS SDK request handlers: {@value}.
* Use {@link #AUDIT_EXECUTION_INTERCEPTORS} instead.
*/
public static final String AUDIT_REQUEST_HANDLERS =
"fs.s3a.audit.request.handlers";
/**
* List of extra AWS SDK execution interceptors: {@value}.
* These are added to the SDK request chain <i>after</i>
* any audit service.
*/
public static final String AUDIT_EXECUTION_INTERCEPTORS =
"fs.s3a.audit.execution.interceptors";
/**
* Should operations outside spans be rejected?
* This is for testing coverage of the span code; if used

View File

@ -25,16 +25,14 @@ import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import com.amazonaws.AmazonWebServiceRequest;
import com.amazonaws.HandlerContextAware;
import com.amazonaws.Request;
import com.amazonaws.Response;
import com.amazonaws.SdkBaseException;
import com.amazonaws.handlers.HandlerAfterAttemptContext;
import com.amazonaws.handlers.HandlerBeforeAttemptContext;
import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.http.HttpResponse;
import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener;
import software.amazon.awssdk.core.SdkRequest;
import software.amazon.awssdk.core.SdkResponse;
import software.amazon.awssdk.core.interceptor.Context;
import software.amazon.awssdk.core.interceptor.ExecutionAttributes;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import software.amazon.awssdk.http.SdkHttpRequest;
import software.amazon.awssdk.http.SdkHttpResponse;
import software.amazon.awssdk.transfer.s3.progress.TransferListener;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -56,6 +54,7 @@ import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A;
import org.apache.hadoop.fs.s3a.audit.OperationAuditor;
import org.apache.hadoop.fs.s3a.audit.OperationAuditorOptions;
import org.apache.hadoop.fs.s3a.audit.S3AAuditConstants;
import org.apache.hadoop.fs.s3a.impl.V2Migration;
import org.apache.hadoop.fs.store.LogExactlyOnce;
import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;
import org.apache.hadoop.service.CompositeService;
@ -66,6 +65,7 @@ import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_FAILURE;
import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION;
import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.attachSpanToRequest;
import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.retrieveAttachedSpan;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS;
/**
@ -82,10 +82,11 @@ import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HAN
* will deactivate the wrapped span and then
* switch the active span to the unbounded span.
*
* The inner class {@link AWSAuditEventCallbacks} is returned
* as a request handler in {@link #createRequestHandlers()};
* this forwards all requests to the outer {@code ActiveAuditManagerS3A},
* which then locates the active span and forwards the request.
* This class also implements {@link ExecutionInterceptor} and
* returns itself in {@link #createExecutionInterceptors()};
* once registered with the S3 client, the implemented methods
* will be called during different parts of an SDK request lifecycle,
* which then locate the active span and forward the request.
* If any such invocation raises an {@link AuditFailureException}
* then the IOStatistics counter for {@code AUDIT_FAILURE}
* is incremented.
@ -390,25 +391,32 @@ public final class ActiveAuditManagerS3A
}
/**
* Return a request handler for the AWS SDK which
* Return a list of execution interceptors for the AWS SDK which
* relays to this class.
* @return a request handler.
* @return a list of execution interceptors.
*/
@Override
public List<RequestHandler2> createRequestHandlers()
public List<ExecutionInterceptor> createExecutionInterceptors()
throws IOException {
// wire up the AWS SDK To call back into this class when
// preparing to make S3 calls.
List<RequestHandler2> requestHandlers = new ArrayList<>();
requestHandlers.add(new SdkRequestHandler());
// now look for any more handlers
final Class<?>[] handlers = getConfig().getClasses(AUDIT_REQUEST_HANDLERS);
List<ExecutionInterceptor> executionInterceptors = new ArrayList<>();
executionInterceptors.add(this);
final String handlers = getConfig().get(AUDIT_REQUEST_HANDLERS);
if (handlers != null) {
for (Class<?> handler : handlers) {
V2Migration.v1RequestHandlersUsed();
}
// TODO: should we remove this and use Global/Service interceptors, see:
// https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html
final Class<?>[] interceptors = getConfig().getClasses(AUDIT_EXECUTION_INTERCEPTORS);
if (interceptors != null) {
for (Class<?> handler : interceptors) {
try {
Constructor<?> ctor = handler.getConstructor();
requestHandlers.add((RequestHandler2)ctor.newInstance());
executionInterceptors.add((ExecutionInterceptor) ctor.newInstance());
} catch (ExceptionInInitializerError e) {
throw FutureIO.unwrapInnerException(e);
} catch (Exception e) {
@ -416,13 +424,18 @@ public final class ActiveAuditManagerS3A
}
}
}
return requestHandlers;
return executionInterceptors;
}
@Override
public TransferStateChangeListener createStateChangeListener() {
public TransferListener createTransferListener() {
final WrappingAuditSpan span = activeSpan();
return (transfer, state) -> switchToActiveSpan(span);
return new TransferListener() {
@Override
public void transferInitiated(Context.TransferInitiated context) {
switchToActiveSpan(span);
}
};
}
@Override
@ -434,20 +447,18 @@ public final class ActiveAuditManagerS3A
}
/**
* Attach a reference to the active thread span, then
* invoke the same callback on that active thread.
* Audit the creation of a request and retrieve
* a reference to the active thread span.
*/
@Override
public <T extends AmazonWebServiceRequest> T requestCreated(
final T request) {
public void requestCreated(final SdkRequest.Builder builder) {
AuditSpanS3A span = getActiveAuditSpan();
if (LOG.isTraceEnabled()) {
LOG.trace("Created Request {} in span {}",
analyzer.analyze(request), span);
analyzer.analyze(builder.build()), span);
}
attachSpanToRequest(request, span);
try {
return span.requestCreated(request);
span.requestCreated(builder);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -463,14 +474,13 @@ public final class ActiveAuditManagerS3A
* {@inheritDoc}
*/
@Override
public <T extends AmazonWebServiceRequest> T beforeExecution(
final T request) {
public void beforeExecution(Context.BeforeExecution context,
ExecutionAttributes executionAttributes) {
ioStatisticsStore.incrementCounter(AUDIT_REQUEST_EXECUTION.getSymbol());
// identify the span and invoke the callback
AuditSpanS3A span = getActiveAuditSpan();
attachSpanToRequest(executionAttributes, span);
try {
return extractAndActivateSpanFromRequest(request)
.beforeExecution(request);
span.beforeExecution(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -479,16 +489,14 @@ public final class ActiveAuditManagerS3A
/**
* Forward to active span.
* @param request request
* @param response response.
* {@inheritDoc}
*/
@Override
public void afterResponse(final Request<?> request,
final Response<?> response)
throws AuditFailureException, SdkBaseException {
public void afterExecution(Context.AfterExecution context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(request)
.afterResponse(request, response);
extractAndActivateSpanFromRequest(context.request(), executionAttributes)
.afterExecution(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -496,18 +504,19 @@ public final class ActiveAuditManagerS3A
}
/**
* Get the active span from the handler context,
* Get the active span from the execution attributes,
* falling back to the active thread span if there
* is nothing in the context.
* Provided the span is a wrapped span, the
* is nothing in the attributes.
* Provided the span is a wrapped span, the span is
* activated.
* @param request request
* @param <T> type of request.
* @return the callbacks
* @param executionAttributes the execution attributes
* @return the active span
*/
private <T extends HandlerContextAware> AWSAuditEventCallbacks
extractAndActivateSpanFromRequest(final T request) {
AWSAuditEventCallbacks span;
span = retrieveAttachedSpan(request);
private AuditSpanS3A extractAndActivateSpanFromRequest(
final SdkRequest request,
final ExecutionAttributes executionAttributes) {
AuditSpanS3A span = retrieveAttachedSpan(executionAttributes);
if (span == null) {
// no span is attached. Not unusual for the copy operations,
// or for calls to GetBucketLocation made by the AWS client
@ -530,18 +539,16 @@ public final class ActiveAuditManagerS3A
/**
* Forward to active span.
* @param request request
* @param response response.
* @param exception exception raised.
* @param context execution context
* @param executionAttributes the execution attributes
* {@inheritDoc}
*/
@Override
public void afterError(final Request<?> request,
final Response<?> response,
final Exception exception)
throws AuditFailureException, SdkBaseException {
public void onExecutionFailure(Context.FailedExecution context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(request)
.afterError(request, response, exception);
extractAndActivateSpanFromRequest(context.request(), executionAttributes).onExecutionFailure(
context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -549,11 +556,12 @@ public final class ActiveAuditManagerS3A
}
@Override
public AmazonWebServiceRequest beforeMarshalling(
final AmazonWebServiceRequest request) {
public SdkRequest modifyRequest(Context.ModifyRequest context,
ExecutionAttributes executionAttributes) {
try {
return extractAndActivateSpanFromRequest(request)
.beforeMarshalling(request);
return extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.modifyRequest(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -561,10 +569,12 @@ public final class ActiveAuditManagerS3A
}
@Override
public void beforeRequest(final Request<?> request) {
public void beforeMarshalling(Context.BeforeMarshalling context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(request)
.beforeRequest(request);
extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.beforeMarshalling(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -572,10 +582,12 @@ public final class ActiveAuditManagerS3A
}
@Override
public void beforeAttempt(final HandlerBeforeAttemptContext context) {
public void afterMarshalling(Context.AfterMarshalling context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(context.getRequest())
.beforeAttempt(context);
extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.afterMarshalling(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -583,10 +595,12 @@ public final class ActiveAuditManagerS3A
}
@Override
public void afterAttempt(final HandlerAfterAttemptContext context) {
public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(context.getRequest())
.afterAttempt(context);
return extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.modifyHttpRequest(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
@ -594,73 +608,80 @@ public final class ActiveAuditManagerS3A
}
@Override
public HttpResponse beforeUnmarshalling(final Request<?> request,
final HttpResponse httpResponse) {
public void beforeTransmission(Context.BeforeTransmission context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(request.getOriginalRequest())
.beforeUnmarshalling(request, httpResponse);
extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.beforeTransmission(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
}
return httpResponse;
}
/**
* Callbacks from the AWS SDK; all forward to the ActiveAuditManagerS3A.
* We need a separate class because the SDK requires the handler list
* to be list of {@code RequestHandler2} instances.
*/
private class SdkRequestHandler extends RequestHandler2 {
@Override
public AmazonWebServiceRequest beforeExecution(
final AmazonWebServiceRequest request) {
return ActiveAuditManagerS3A.this.beforeExecution(request);
@Override
public void afterTransmission(Context.AfterTransmission context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.afterTransmission(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
}
}
@Override
public void afterResponse(final Request<?> request,
final Response<?> response) {
ActiveAuditManagerS3A.this.afterResponse(request, response);
@Override
public SdkHttpResponse modifyHttpResponse(Context.ModifyHttpResponse context,
ExecutionAttributes executionAttributes) {
try {
return extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.modifyHttpResponse(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
}
}
@Override
public void afterError(final Request<?> request,
final Response<?> response,
final Exception e) {
ActiveAuditManagerS3A.this.afterError(request, response, e);
@Override
public void beforeUnmarshalling(Context.BeforeUnmarshalling context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.beforeUnmarshalling(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
}
}
@Override
public AmazonWebServiceRequest beforeMarshalling(
final AmazonWebServiceRequest request) {
return ActiveAuditManagerS3A.this.beforeMarshalling(request);
@Override
public void afterUnmarshalling(Context.AfterUnmarshalling context,
ExecutionAttributes executionAttributes) {
try {
extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.afterUnmarshalling(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
}
}
@Override
public void beforeRequest(final Request<?> request) {
ActiveAuditManagerS3A.this.beforeRequest(request);
}
@Override
public void beforeAttempt(
final HandlerBeforeAttemptContext context) {
ActiveAuditManagerS3A.this.beforeAttempt(context);
}
@Override
public HttpResponse beforeUnmarshalling(
final Request<?> request,
final HttpResponse httpResponse) {
return ActiveAuditManagerS3A.this.beforeUnmarshalling(request,
httpResponse);
}
@Override
public void afterAttempt(
final HandlerAfterAttemptContext context) {
ActiveAuditManagerS3A.this.afterAttempt(context);
@Override
public SdkResponse modifyResponse(Context.ModifyResponse context,
ExecutionAttributes executionAttributes) {
try {
return extractAndActivateSpanFromRequest(context.request(),
executionAttributes)
.modifyResponse(context, executionAttributes);
} catch (AuditFailureException e) {
ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol());
throw e;
}
}
@ -748,9 +769,8 @@ public final class ActiveAuditManagerS3A
* {@inheritDoc}
*/
@Override
public <T extends AmazonWebServiceRequest> T requestCreated(
final T request) {
return span.requestCreated(request);
public void requestCreated(final SdkRequest.Builder builder) {
span.requestCreated(builder);
}
/**
@ -774,79 +794,132 @@ public final class ActiveAuditManagerS3A
/**
* Forward to the inner span.
* @param request request
* @param <T> type of request
* @return an updated request.
* {@inheritDoc}
*/
@Override
public <T extends AmazonWebServiceRequest> T beforeExecution(
final T request) {
return span.beforeExecution(request);
public void beforeExecution(Context.BeforeExecution context,
ExecutionAttributes executionAttributes) {
span.beforeExecution(context, executionAttributes);
}
/**
* Forward to the inner span.
* @param request request
* @param response response.
* {@inheritDoc}
*/
@Override
public void afterResponse(final Request<?> request,
final Response<?> response) {
span.afterResponse(request, response);
public void afterExecution(Context.AfterExecution context,
ExecutionAttributes executionAttributes) {
span.afterExecution(context, executionAttributes);
}
/**
* Forward to the inner span.
* @param request request
* @param response response.
* @param exception exception raised.
* {@inheritDoc}
*/
@Override
public void afterError(final Request<?> request,
final Response<?> response,
final Exception exception) {
span.afterError(request, response, exception);
public void onExecutionFailure(Context.FailedExecution context,
ExecutionAttributes executionAttributes) {
span.onExecutionFailure(context, executionAttributes);
}
/**
* Forward to the inner span.
* @param request request
* @return request to marshall
* {@inheritDoc}
*/
@Override
public AmazonWebServiceRequest beforeMarshalling(
final AmazonWebServiceRequest request) {
return span.beforeMarshalling(request);
public void beforeMarshalling(Context.BeforeMarshalling context,
ExecutionAttributes executionAttributes) {
span.beforeMarshalling(context, executionAttributes);
}
/**
* Forward to the inner span.
* @param request request
* {@inheritDoc}
*/
@Override
public void beforeRequest(final Request<?> request) {
span.beforeRequest(request);
public SdkRequest modifyRequest(Context.ModifyRequest context,
ExecutionAttributes executionAttributes) {
return span.modifyRequest(context, executionAttributes);
}
/**
* Forward to the inner span.
* @param context full context, including the request.
* {@inheritDoc}
*/
@Override
public void beforeAttempt(
final HandlerBeforeAttemptContext context) {
span.beforeAttempt(context);
public void afterMarshalling(Context.AfterMarshalling context,
ExecutionAttributes executionAttributes) {
span.afterMarshalling(context, executionAttributes);
}
/**
* Forward to the inner span.
*
* @param context full context, including the request.
* {@inheritDoc}
*/
@Override
public void afterAttempt(
final HandlerAfterAttemptContext context) {
span.afterAttempt(context);
public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context,
ExecutionAttributes executionAttributes) {
return span.modifyHttpRequest(context, executionAttributes);
}
/**
* Forward to the inner span.
* {@inheritDoc}
*/
@Override
public void beforeTransmission(Context.BeforeTransmission context,
ExecutionAttributes executionAttributes) {
span.beforeTransmission(context, executionAttributes);
}
/**
* Forward to the inner span.
* {@inheritDoc}
*/
@Override
public void afterTransmission(Context.AfterTransmission context,
ExecutionAttributes executionAttributes) {
span.afterTransmission(context, executionAttributes);
}
/**
* Forward to the inner span.
* {@inheritDoc}
*/
@Override
public SdkHttpResponse modifyHttpResponse(Context.ModifyHttpResponse context,
ExecutionAttributes executionAttributes) {
return span.modifyHttpResponse(context, executionAttributes);
}
/**
* Forward to the inner span.
* {@inheritDoc}
*/
@Override
public void beforeUnmarshalling(Context.BeforeUnmarshalling context,
ExecutionAttributes executionAttributes) {
span.beforeUnmarshalling(context, executionAttributes);
}
/**
* Forward to the inner span.
* {@inheritDoc}
*/
@Override
public void afterUnmarshalling(Context.AfterUnmarshalling context,
ExecutionAttributes executionAttributes) {
span.afterUnmarshalling(context, executionAttributes);
}
/**
* Forward to the inner span.
* {@inheritDoc}
*/
@Override
public SdkResponse modifyResponse(Context.ModifyResponse context,
ExecutionAttributes executionAttributes) {
return span.modifyResponse(context, executionAttributes);
}
@Override
@ -859,5 +932,4 @@ public final class ActiveAuditManagerS3A
return sb.toString();
}
}
}

View File

@ -24,10 +24,14 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import com.amazonaws.AmazonWebServiceRequest;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.awscore.AwsExecutionAttribute;
import software.amazon.awssdk.core.SdkRequest;
import software.amazon.awssdk.core.interceptor.Context;
import software.amazon.awssdk.core.interceptor.ExecutionAttributes;
import software.amazon.awssdk.http.SdkHttpRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -258,21 +262,22 @@ public class LoggingAuditor
/**
* Attach Range of data for GetObject Request.
* @param request given get object request
* @param request the sdk request to be modified
* @param executionAttributes execution attributes for this request
*/
private void attachRangeFromRequest(AmazonWebServiceRequest request) {
if (request instanceof GetObjectRequest) {
long[] rangeValue = ((GetObjectRequest) request).getRange();
if (rangeValue == null || rangeValue.length == 0) {
return;
private void attachRangeFromRequest(SdkHttpRequest request,
ExecutionAttributes executionAttributes) {
String operationName = executionAttributes.getAttribute(AwsExecutionAttribute.OPERATION_NAME);
if (operationName != null && operationName.equals("GetObject")) {
if (request.headers() != null && request.headers().get("Range") != null) {
String[] rangeHeader = request.headers().get("Range").get(0).split("=");
// only set header if range unit is bytes
if (rangeHeader[0].equals("bytes")) {
referrer.set(AuditConstants.PARAM_RANGE, rangeHeader[1]);
}
}
if (rangeValue.length != 2) {
WARN_INCORRECT_RANGE.warn("Expected range to contain 0 or 2 elements."
+ " Got {} elements. Ignoring.", rangeValue.length);
return;
}
String combinedRangeValue = String.format("%d-%d", rangeValue[0], rangeValue[1]);
referrer.set(AuditConstants.PARAM_RANGE, combinedRangeValue);
}
}
@ -346,64 +351,78 @@ public class LoggingAuditor
referrer.set(key, value);
}
/**
* Before execution, the logging auditor always builds
* the referrer header, saves to the outer class
* (where {@link #getLastHeader()} can retrieve it,
* Before transmitting a request, the logging auditor
* always builds the referrer header, saves to the outer
* class (where {@link #getLastHeader()} can retrieve it,
* and logs at debug.
* If configured to add the header to the S3 logs, it will
* be set as the HTTP referrer.
* @param request request
* @param <T> type of request.
* @return the request with any extra headers.
* @param context The current state of the execution,
* including the SDK and current HTTP request.
* @param executionAttributes A mutable set of attributes scoped
* to one specific request/response
* cycle that can be used to give data
* to future lifecycle methods.
* @return The potentially-modified HTTP request that should be
* sent to the service. Must not be null.
*/
@Override
public <T extends AmazonWebServiceRequest> T beforeExecution(
final T request) {
public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context,
ExecutionAttributes executionAttributes) {
SdkHttpRequest httpRequest = context.httpRequest();
SdkRequest sdkRequest = context.request();
// attach range for GetObject requests
attachRangeFromRequest(request);
attachRangeFromRequest(httpRequest, executionAttributes);
// for delete op, attach the number of files to delete
attachDeleteKeySizeAttribute(request);
attachDeleteKeySizeAttribute(sdkRequest);
// build the referrer header
final String header = referrer.buildHttpReferrer();
// update the outer class's field.
setLastHeader(header);
if (headerEnabled) {
// add the referrer header
request.putCustomRequestHeader(HEADER_REFERRER,
header);
httpRequest = httpRequest.toBuilder()
.appendHeader(HEADER_REFERRER, header)
.build();
}
if (LOG.isDebugEnabled()) {
LOG.debug("[{}] {} Executing {} with {}; {}",
currentThreadID(),
getSpanId(),
getOperationName(),
analyzer.analyze(request),
analyzer.analyze(context.request()),
header);
}
// now see if the request is actually a blocked multipart request
if (!isMultipartUploadEnabled && isRequestMultipartIO(request)) {
if (!isMultipartUploadEnabled && isRequestMultipartIO(sdkRequest)) {
throw new AuditOperationRejectedException("Multipart IO request "
+ request + " rejected " + header);
+ sdkRequest + " rejected " + header);
}
return request;
return httpRequest;
}
/**
* For delete requests, attach delete key size as a referrer attribute.
*
* @param request the request object.
* @param <T> type of the request.
*/
private <T extends AmazonWebServiceRequest> void attachDeleteKeySizeAttribute(T request) {
private void attachDeleteKeySizeAttribute(SdkRequest request) {
if (request instanceof DeleteObjectsRequest) {
int keySize = ((DeleteObjectsRequest) request).getKeys().size();
this.set(DELETE_KEYS_SIZE, String.valueOf(keySize));
int keySize = ((DeleteObjectsRequest) request).delete().objects().size();
referrer.set(DELETE_KEYS_SIZE, String.valueOf(keySize));
} else if (request instanceof DeleteObjectRequest) {
String key = ((DeleteObjectRequest) request).getKey();
String key = ((DeleteObjectRequest) request).key();
if (key != null && key.length() > 0) {
this.set(DELETE_KEYS_SIZE, "1");
referrer.set(DELETE_KEYS_SIZE, "1");
}
}
}
@ -460,15 +479,13 @@ public class LoggingAuditor
}
@Override
public <T extends AmazonWebServiceRequest> T requestCreated(
final T request) {
public void requestCreated(final SdkRequest.Builder builder) {
String error = "Creating a request outside an audit span "
+ analyzer.analyze(request);
+ analyzer.analyze(builder.build());
LOG.info(error);
if (LOG.isDebugEnabled()) {
LOG.debug(error, new AuditFailureException("unaudited"));
}
return request;
}
/**
@ -476,20 +493,22 @@ public class LoggingAuditor
* increment the failure count.
* Some requests (e.g. copy part) are not expected in spans due
* to how they are executed; these do not trigger failures.
* @param request request
* @param <T> type of request
* @return an updated request.
* @throws AuditFailureException if failure is enabled.
* @param context The current state of the execution, including
* the unmodified SDK request from the service
* client call.
* @param executionAttributes A mutable set of attributes scoped
* to one specific request/response
* cycle that can be used to give data
* to future lifecycle methods.
*/
@Override
public <T extends AmazonWebServiceRequest> T beforeExecution(
final T request) {
public void beforeExecution(Context.BeforeExecution context,
ExecutionAttributes executionAttributes) {
String error = "executing a request outside an audit span "
+ analyzer.analyze(request);
+ analyzer.analyze(context.request());
final String unaudited = getSpanId() + " "
+ UNAUDITED_OPERATION + " " + error;
if (isRequestNotAlwaysInSpan(request)) {
if (isRequestNotAlwaysInSpan(context.request())) {
// can get by auditing during a copy, so don't overreact
LOG.debug(unaudited);
} else {
@ -500,7 +519,7 @@ public class LoggingAuditor
}
}
// now hand off to the superclass for its normal preparation
return super.beforeExecution(request);
super.beforeExecution(context, executionAttributes);
}
}
}

View File

@ -24,9 +24,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.services.s3.transfer.Transfer;
import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import software.amazon.awssdk.transfer.s3.progress.TransferListener;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
@ -39,6 +38,7 @@ import org.apache.hadoop.fs.s3a.audit.OperationAuditor;
import org.apache.hadoop.fs.s3a.audit.OperationAuditorOptions;
import org.apache.hadoop.service.CompositeService;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore;
/**
@ -121,17 +121,13 @@ public class NoopAuditManagerS3A extends CompositeService
}
@Override
public List<RequestHandler2> createRequestHandlers() throws IOException {
public List<ExecutionInterceptor> createExecutionInterceptors() throws IOException {
return new ArrayList<>();
}
@Override
public TransferStateChangeListener createStateChangeListener() {
return new TransferStateChangeListener() {
public void transferStateChanged(final Transfer transfer,
final Transfer.TransferState state) {
}
};
public TransferListener createTransferListener() {
return new TransferListener() {};
}
/**

View File

@ -18,10 +18,10 @@
package org.apache.hadoop.fs.s3a.audit.impl;
import com.amazonaws.handlers.HandlerContextKey;
import software.amazon.awssdk.core.interceptor.ExecutionAttribute;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks;
import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A;
/**
* Internal constants; not intended for public use, or
@ -34,11 +34,11 @@ public final class S3AInternalAuditConstants {
}
/**
* Handler key for audit span callbacks.
* This is used to bind the handler in the AWS code.
* Exceution attribute for audit span callbacks.
* This is used to retrieve the span in the AWS code.
*/
public static final HandlerContextKey<AWSAuditEventCallbacks>
AUDIT_SPAN_HANDLER_CONTEXT =
new HandlerContextKey<>(
"org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks");
public static final ExecutionAttribute<AuditSpanS3A>
AUDIT_SPAN_EXECUTION_ATTRIBUTE =
new ExecutionAttribute<>(
"org.apache.hadoop.fs.s3a.audit.AuditSpanS3A");
}

View File

@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.auth;
import javax.annotation.Nullable;
import java.net.URI;
import com.amazonaws.auth.AWSCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import org.apache.hadoop.conf.Configuration;
@ -29,12 +29,9 @@ import org.apache.hadoop.conf.Configuration;
* Base class for AWS credential providers which
* take a URI and config in their constructor.
*
* @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider
* as part of upgrading S3A to SDK V2. See HADOOP-18073.
*/
@Deprecated
public abstract class AbstractAWSCredentialProvider
implements AWSCredentialsProvider {
implements AwsCredentialsProvider {
private final URI binding;
@ -65,10 +62,4 @@ public abstract class AbstractAWSCredentialProvider
return binding;
}
/**
* Refresh is a no-op by default.
*/
@Override
public void refresh() {
}
}

View File

@ -23,8 +23,8 @@ import java.net.URI;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import com.amazonaws.SdkBaseException;
import com.amazonaws.auth.AWSCredentials;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
@ -33,19 +33,17 @@ import org.apache.hadoop.fs.s3a.CredentialInitializationException;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.Retries;
/**
* Base class for session credential support.
*
* @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider
* as part of upgrading S3A to SDK V2. See HADOOP-18073.
*/
@InterfaceAudience.Private
@Deprecated
public abstract class AbstractSessionCredentialsProvider
extends AbstractAWSCredentialProvider {
/** Credentials, created in {@link #init()}. */
private volatile AWSCredentials awsCredentials;
private volatile AwsCredentials awsCredentials;
/** Atomic flag for on-demand initialization. */
private final AtomicBoolean initialized = new AtomicBoolean(false);
@ -105,7 +103,7 @@ public abstract class AbstractSessionCredentialsProvider
* @return the credentials
* @throws IOException on any failure.
*/
protected abstract AWSCredentials createCredentials(Configuration config)
protected abstract AwsCredentials createCredentials(Configuration config)
throws IOException;
/**
@ -115,10 +113,10 @@ public abstract class AbstractSessionCredentialsProvider
* is thrown here before any attempt to return the credentials
* is made.
* @return credentials, if set.
* @throws SdkBaseException if one was raised during init
* @throws SdkException if one was raised during init
* @throws CredentialInitializationException on other failures.
*/
public AWSCredentials getCredentials() throws SdkBaseException {
public AwsCredentials resolveCredentials() throws SdkException {
// do an on-demand init then raise an AWS SDK exception if
// there was a failure.
try {
@ -126,8 +124,8 @@ public abstract class AbstractSessionCredentialsProvider
init();
}
} catch (IOException e) {
if (e.getCause() instanceof SdkBaseException) {
throw (SdkBaseException) e.getCause();
if (e.getCause() instanceof SdkException) {
throw (SdkException) e.getCause();
} else {
throw new CredentialInitializationException(e.getMessage(), e);
}
@ -165,15 +163,16 @@ public abstract class AbstractSessionCredentialsProvider
* This will be interpreted as "this provider has no credentials to offer",
* rather than an explicit error or anonymous access.
*/
protected static final class NoCredentials implements AWSCredentials {
protected static final class NoCredentials implements AwsCredentials {
@Override
public String getAWSAccessKeyId() {
public String accessKeyId() {
return null;
}
@Override
public String getAWSSecretKey() {
public String secretAccessKey() {
return null;
}
}
}

View File

@ -26,12 +26,14 @@ import java.util.Arrays;
import java.util.Locale;
import java.util.concurrent.TimeUnit;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider;
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider;
import software.amazon.awssdk.core.exception.SdkClientException;
import software.amazon.awssdk.services.sts.StsClient;
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
import software.amazon.awssdk.services.sts.model.AssumeRoleRequest;
import software.amazon.awssdk.services.sts.model.StsException;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.collect.Sets;
import org.slf4j.Logger;
@ -51,7 +53,7 @@ import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider;
import org.apache.hadoop.security.UserGroupInformation;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList;
import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList;
/**
* Support IAM Assumed roles by instantiating an instance of
@ -61,13 +63,10 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList;
*
* Classname is used in configuration files; do not move.
*
* @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider
* as part of upgrading S3A to SDK V2. See HADOOP-18073.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
@Deprecated
public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
public class AssumedRoleCredentialProvider implements AwsCredentialsProvider,
Closeable {
private static final Logger LOG =
@ -78,7 +77,7 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
public static final String E_NO_ROLE = "Unset property "
+ ASSUMED_ROLE_ARN;
private final STSAssumeRoleSessionCredentialsProvider stsProvider;
private final StsAssumeRoleCredentialsProvider stsProvider;
private final String sessionName;
@ -90,15 +89,17 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
private final Invoker invoker;
private final StsClient stsClient;
/**
* Instantiate.
* This calls {@link #getCredentials()} to fail fast on the inner
* This calls {@link #resolveCredentials()} to fail fast on the inner
* role credential retrieval.
* @param fsUri possibly null URI of the filesystem.
* @param conf configuration
* @throws IOException on IO problems and some parameter checking
* @throws IllegalArgumentException invalid parameters
* @throws AWSSecurityTokenServiceException problems getting credentials
* @throws StsException problems getting credentials
*/
public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf)
throws IOException {
@ -125,29 +126,31 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
String policy = conf.getTrimmed(ASSUMED_ROLE_POLICY, "");
LOG.debug("{}", this);
STSAssumeRoleSessionCredentialsProvider.Builder builder
= new STSAssumeRoleSessionCredentialsProvider.Builder(arn, sessionName);
builder.withRoleSessionDurationSeconds((int) duration);
AssumeRoleRequest.Builder requestBuilder =
AssumeRoleRequest.builder().roleArn(arn).roleSessionName(sessionName)
.durationSeconds((int) duration);
if (StringUtils.isNotEmpty(policy)) {
LOG.debug("Scope down policy {}", policy);
builder.withScopeDownPolicy(policy);
requestBuilder.policy(policy);
}
String endpoint = conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT, "");
String region = conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT_REGION,
ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT);
AWSSecurityTokenServiceClientBuilder stsbuilder =
stsClient =
STSClientFactory.builder(
conf,
fsUri != null ? fsUri.getHost() : "",
credentialsToSTS,
endpoint,
region);
// the STS client is not tracked for a shutdown in close(), because it
// (currently) throws an UnsupportedOperationException in shutdown().
builder.withStsClient(stsbuilder.build());
region).build();
//now build the provider
stsProvider = builder.build();
stsProvider = StsAssumeRoleCredentialsProvider.builder()
.refreshRequest(requestBuilder.build())
.stsClient(stsClient).build();
// to handle STS throttling by the AWS account, we
// need to retry
@ -155,21 +158,21 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
// and force in a fail-fast check just to keep the stack traces less
// convoluted
getCredentials();
resolveCredentials();
}
/**
* Get credentials.
* @return the credentials
* @throws AWSSecurityTokenServiceException if none could be obtained.
* @throws StsException if none could be obtained.
*/
@Override
@Retries.RetryRaw
public AWSCredentials getCredentials() {
public AwsCredentials resolveCredentials() {
try {
return invoker.retryUntranslated("getCredentials",
true,
stsProvider::getCredentials);
stsProvider::resolveCredentials);
} catch (IOException e) {
// this is in the signature of retryUntranslated;
// its hard to see how this could be raised, but for
@ -178,24 +181,19 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
throw new CredentialInitializationException(
"getCredentials failed: " + e,
e);
} catch (AWSSecurityTokenServiceException e) {
} catch (SdkClientException e) {
LOG.error("Failed to get credentials for role {}",
arn, e);
throw e;
}
}
@Override
public void refresh() {
stsProvider.refresh();
}
/**
* Propagate the close() call to the inner stsProvider.
*/
@Override
public void close() {
S3AUtils.closeAutocloseables(LOG, stsProvider, credentialsToSTS);
S3AUtils.closeAutocloseables(LOG, stsProvider, credentialsToSTS, stsClient);
}
@Override

View File

@ -0,0 +1,283 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.auth;
import java.io.IOException;
import java.lang.reflect.Modifier;
import java.net.URI;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import com.amazonaws.auth.AWSCredentialsProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider;
import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
import static org.apache.hadoop.fs.s3a.Constants.AWS_AUTH_CLASS_PREFIX;
import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER;
/**
* This class provides methods to create the list of AWS credential providers.
*/
public final class AwsCredentialListProvider {
private AwsCredentialListProvider() {
}
private static final Logger LOG = LoggerFactory.getLogger(AwsCredentialListProvider.class);
public static final String NOT_AWS_PROVIDER =
"does not implement AWSCredentialsProvider";
public static final String NOT_AWS_V2_PROVIDER =
"does not implement AwsCredentialsProvider";
public static final String ABSTRACT_PROVIDER =
"is abstract and therefore cannot be created";
/**
* Error message when the AWS provider list built up contains a forbidden
* entry.
*/
@VisibleForTesting
public static final String E_FORBIDDEN_AWS_PROVIDER
= "AWS provider class cannot be used";
/**
* The standard AWS provider list for AWS connections.
*/
public static final List<Class<?>>
STANDARD_AWS_PROVIDERS = Collections.unmodifiableList(
Arrays.asList(
TemporaryAWSCredentialsProvider.class,
SimpleAWSCredentialsProvider.class,
EnvironmentVariableCredentialsProvider.class,
IAMInstanceCredentialsProvider.class));
/**
* Create the AWS credentials from the providers, the URI and
* the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration.
* @param binding Binding URI -may be null
* @param conf filesystem configuration
* @return a credentials provider list
* @throws IOException Problems loading the providers (including reading
* secrets from credential files).
*/
public static AWSCredentialProviderList createAWSCredentialProviderSet(
@Nullable URI binding,
Configuration conf) throws IOException {
// this will reject any user:secret entries in the URI
S3xLoginHelper.rejectSecretsInURIs(binding);
AWSCredentialProviderList credentials =
buildAWSProviderList(binding,
conf,
AWS_CREDENTIALS_PROVIDER,
STANDARD_AWS_PROVIDERS,
new HashSet<>());
// make sure the logging message strips out any auth details
LOG.debug("For URI {}, using credentials {}",
binding, credentials);
return credentials;
}
/**
* Load list of AWS credential provider/credential provider factory classes.
* @param conf configuration
* @param key key
* @param defaultValue list of default values
* @return the list of classes, possibly empty
* @throws IOException on a failure to load the list.
*/
private static List<Class<?>> loadAWSProviderClasses(Configuration conf,
String key,
Class<?>... defaultValue) throws IOException {
try {
return Arrays.asList(conf.getClasses(key, defaultValue));
} catch (RuntimeException e) {
Throwable c = e.getCause() != null ? e.getCause() : e;
throw new IOException("From option " + key + ' ' + c, c);
}
}
/**
* Maps V1 credential providers to either their equivalent SDK V2 class or hadoop provider.
*/
private static Map<String, Class> initCredentialProvidersMap() {
Map<String, Class> v1v2CredentialProviderMap = new HashMap<>();
v1v2CredentialProviderMap.put("EnvironmentVariableCredentialsProvider",
EnvironmentVariableCredentialsProvider.class);
v1v2CredentialProviderMap.put("EC2ContainerCredentialsProviderWrapper",
IAMInstanceCredentialsProvider.class);
v1v2CredentialProviderMap.put("InstanceProfileCredentialsProvider",
IAMInstanceCredentialsProvider.class);
return v1v2CredentialProviderMap;
}
/**
* Load list of AWS credential provider/credential provider factory classes;
* support a forbidden list to prevent loops, mandate full secrets, etc.
* @param binding Binding URI -may be null
* @param conf configuration
* @param key key
* @param forbidden a possibly empty set of forbidden classes.
* @param defaultValues list of default providers.
* @return the list of classes, possibly empty
* @throws IOException on a failure to load the list.
*/
public static AWSCredentialProviderList buildAWSProviderList(
@Nullable final URI binding,
final Configuration conf,
final String key,
final List<Class<?>> defaultValues,
final Set<Class<?>> forbidden) throws IOException {
// build up the base provider
List<Class<?>> awsClasses = loadAWSProviderClasses(conf,
key,
defaultValues.toArray(new Class[defaultValues.size()]));
Map<String, Class> v1v2CredentialProviderMap = initCredentialProvidersMap();
// and if the list is empty, switch back to the defaults.
// this is to address the issue that configuration.getClasses()
// doesn't return the default if the config value is just whitespace.
if (awsClasses.isEmpty()) {
awsClasses = defaultValues;
}
// iterate through, checking for blacklists and then instantiating
// each provider
AWSCredentialProviderList providers = new AWSCredentialProviderList();
for (Class<?> aClass : awsClasses) {
if (forbidden.contains(aClass)) {
throw new IOException(E_FORBIDDEN_AWS_PROVIDER
+ " in option " + key + ": " + aClass);
}
if (v1v2CredentialProviderMap.containsKey(aClass.getSimpleName()) &&
aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)){
providers.add(createAWSV2CredentialProvider(conf,
v1v2CredentialProviderMap.get(aClass.getSimpleName()), binding));
} else if (AWSCredentialsProvider.class.isAssignableFrom(aClass)) {
providers.add(createAWSV1CredentialProvider(conf,
aClass, binding));
} else {
providers.add(createAWSV2CredentialProvider(conf, aClass, binding));
}
}
return providers;
}
/**
* Create an AWS credential provider from its class by using reflection. The
* class must implement one of the following means of construction, which are
* attempted in order:
*
* <ol>
* <li>a public constructor accepting java.net.URI and
* org.apache.hadoop.conf.Configuration</li>
* <li>a public constructor accepting
* org.apache.hadoop.conf.Configuration</li>
* <li>a public static method named getInstance that accepts no
* arguments and returns an instance of
* com.amazonaws.auth.AWSCredentialsProvider, or</li>
* <li>a public default constructor.</li>
* </ol>
*
* @param conf configuration
* @param credClass credential class
* @param uri URI of the FS
* @return the instantiated class
* @throws IOException on any instantiation failure.
*/
private static AWSCredentialsProvider createAWSV1CredentialProvider(Configuration conf,
Class<?> credClass, @Nullable URI uri) throws IOException {
AWSCredentialsProvider credentials = null;
String className = credClass.getName();
if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) {
throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER);
}
if (Modifier.isAbstract(credClass.getModifiers())) {
throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER);
}
LOG.debug("Credential provider class is {}", className);
credentials =
S3AUtils.getInstanceFromReflection(credClass, conf, uri, AWSCredentialsProvider.class,
"getInstance", AWS_CREDENTIALS_PROVIDER);
return credentials;
}
/**
* Create an AWS credential provider from its class by using reflection. The
* class must implement one of the following means of construction, which are
* attempted in order:
*
* <ol>
* <li>a public constructor accepting java.net.URI and
* org.apache.hadoop.conf.Configuration</li>
* <li>a public constructor accepting
* org.apache.hadoop.conf.Configuration</li>
* <li>a public static method named getInstance that accepts no
* arguments and returns an instance of
* software.amazon.awssdk.auth.credentials.AwsCredentialsProvider, or</li>
* <li>a public default constructor.</li>
* </ol>
*
* @param conf configuration
* @param credClass credential class
* @param uri URI of the FS
* @return the instantiated class
* @throws IOException on any instantiation failure.
*/
private static AwsCredentialsProvider createAWSV2CredentialProvider(Configuration conf,
Class<?> credClass, @Nullable URI uri) throws IOException {
AwsCredentialsProvider credentials = null;
String className = credClass.getName();
if (!AwsCredentialsProvider.class.isAssignableFrom(credClass)) {
throw new IOException("Class " + credClass + " " + NOT_AWS_V2_PROVIDER);
}
if (Modifier.isAbstract(credClass.getModifiers())) {
throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER);
}
LOG.debug("Credential provider class is {}", className);
credentials =
S3AUtils.getInstanceFromReflection(credClass, conf, uri, AwsCredentialsProvider.class,
"create", AWS_CREDENTIALS_PROVIDER);
return credentials;
}
}

View File

@ -21,10 +21,11 @@ package org.apache.hadoop.fs.s3a.auth;
import java.io.Closeable;
import java.io.IOException;
import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider;
import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider;
import software.amazon.awssdk.core.exception.SdkClientException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -41,17 +42,14 @@ import org.apache.hadoop.classification.InterfaceStability;
* <p>
* It is implicitly public; marked evolving as we can change its semantics.
*
* @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider
* as part of upgrading S3A to SDK V2. See HADOOP-18073.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
@Deprecated
public class IAMInstanceCredentialsProvider
implements AWSCredentialsProvider, Closeable {
implements AwsCredentialsProvider, Closeable {
private final AWSCredentialsProvider provider =
new EC2ContainerCredentialsProviderWrapper();
private final AwsCredentialsProvider containerCredentialsProvider =
ContainerCredentialsProvider.builder().build();
public IAMInstanceCredentialsProvider() {
}
@ -63,19 +61,29 @@ public class IAMInstanceCredentialsProvider
* @throws NoAwsCredentialsException on auth failure to indicate non-recoverable.
*/
@Override
public AWSCredentials getCredentials() {
public AwsCredentials resolveCredentials() {
try {
return provider.getCredentials();
} catch (AmazonClientException e) {
return getCredentials();
} catch (SdkClientException e) {
throw new NoAwsCredentialsException("IAMInstanceCredentialsProvider",
e.getMessage(),
e);
}
}
@Override
public void refresh() {
provider.refresh();
/**
* First try {@link ContainerCredentialsProvider}, which will throw an exception if credentials
* cannot be retrieved from the container. Then resolve credentials
* using {@link InstanceProfileCredentialsProvider}.
*
* @return credentials
*/
private AwsCredentials getCredentials() {
try {
return containerCredentialsProvider.resolveCredentials();
} catch (SdkClientException e) {
return InstanceProfileCredentialsProvider.create().resolveCredentials();
}
}
@Override

View File

@ -24,15 +24,13 @@ import java.util.Date;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSSessionCredentials;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.BasicSessionCredentials;
import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
import com.amazonaws.services.securitytoken.model.Credentials;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.services.sts.StsClient;
import software.amazon.awssdk.services.sts.model.Credentials;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -77,10 +75,10 @@ public final class MarshalledCredentialBinding {
public static MarshalledCredentials fromSTSCredentials(
final Credentials credentials) {
MarshalledCredentials marshalled = new MarshalledCredentials(
credentials.getAccessKeyId(),
credentials.getSecretAccessKey(),
credentials.getSessionToken());
Date date = credentials.getExpiration();
credentials.accessKeyId(),
credentials.secretAccessKey(),
credentials.sessionToken());
Date date = Date.from(credentials.expiration());
marshalled.setExpiration(date != null ? date.getTime() : 0);
return marshalled;
}
@ -91,11 +89,11 @@ public final class MarshalledCredentialBinding {
* @return a set of marshalled credentials.
*/
public static MarshalledCredentials fromAWSCredentials(
final AWSSessionCredentials credentials) {
final AwsSessionCredentials credentials) {
return new MarshalledCredentials(
credentials.getAWSAccessKeyId(),
credentials.getAWSSecretKey(),
credentials.getSessionToken());
credentials.accessKeyId(),
credentials.secretAccessKey(),
credentials.sessionToken());
}
/**
@ -156,7 +154,7 @@ public final class MarshalledCredentialBinding {
* @throws NoAuthWithAWSException validation failure
* @throws NoAwsCredentialsException the credentials are actually empty.
*/
public static AWSCredentials toAWSCredentials(
public static AwsCredentials toAWSCredentials(
final MarshalledCredentials marshalled,
final MarshalledCredentials.CredentialTypeRequired typeRequired,
final String component)
@ -173,46 +171,49 @@ public final class MarshalledCredentialBinding {
final String secretKey = marshalled.getSecretKey();
if (marshalled.hasSessionToken()) {
// a session token was supplied, so return session credentials
return new BasicSessionCredentials(accessKey, secretKey,
return AwsSessionCredentials.create(accessKey, secretKey,
marshalled.getSessionToken());
} else {
// these are full credentials
return new BasicAWSCredentials(accessKey, secretKey);
return AwsBasicCredentials.create(accessKey, secretKey);
}
}
/**
* Request a set of credentials from an STS endpoint.
* @param parentCredentials the parent credentials needed to talk to STS
* @param awsConf AWS client configuration
* @param configuration AWS client configuration
* @param stsEndpoint an endpoint, use "" for none
* @param stsRegion region; use if the endpoint isn't the AWS default.
* @param duration duration of the credentials in seconds. Minimum value: 900.
* @param invoker invoker to use for retrying the call.
* @param bucket bucket name.
* @return the credentials
* @throws IOException on a failure of the request
*/
@Retries.RetryTranslated
public static MarshalledCredentials requestSessionCredentials(
final AWSCredentialsProvider parentCredentials,
final ClientConfiguration awsConf,
final AwsCredentialsProvider parentCredentials,
final Configuration configuration,
final String stsEndpoint,
final String stsRegion,
final int duration,
final Invoker invoker) throws IOException {
final Invoker invoker,
final String bucket) throws IOException {
try {
final AWSSecurityTokenService tokenService =
final StsClient tokenService =
STSClientFactory.builder(parentCredentials,
awsConf,
configuration,
stsEndpoint.isEmpty() ? null : stsEndpoint,
stsRegion)
stsRegion,
bucket)
.build();
try (STSClientFactory.STSClient stsClient = STSClientFactory.createClientConnection(
tokenService, invoker)) {
return fromSTSCredentials(stsClient.requestSessionCredentials(duration,
TimeUnit.SECONDS));
}
} catch (SdkClientException e) {
} catch (SdkException e) {
if (stsRegion.isEmpty()) {
LOG.error("Region must be provided when requesting session credentials.",
e);

View File

@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.auth;
import java.io.IOException;
import java.net.URI;
import com.amazonaws.auth.AWSCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -40,7 +40,6 @@ import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.toAWSCre
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
@SuppressWarnings("deprecation")
public class MarshalledCredentialProvider extends
AbstractSessionCredentialsProvider {
@ -85,7 +84,7 @@ public class MarshalledCredentialProvider extends
* @throws IOException on a failure
*/
@Override
protected AWSCredentials createCredentials(final Configuration config)
protected AwsCredentials createCredentials(final Configuration config)
throws IOException {
return toAWSCredentials(credentials, typeRequired, component);
}

View File

@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.auth;
import org.apache.hadoop.fs.s3a.CredentialInitializationException;
/**
* A specific subclass of {@code AmazonClientException} which is
* A specific subclass of {@code SdkException} which is
* used in the S3A retry policy to fail fast when there is any
* authentication problem.
*/

View File

@ -20,16 +20,23 @@ package org.apache.hadoop.fs.s3a.auth;
import java.io.Closeable;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.concurrent.TimeUnit;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
import com.amazonaws.services.securitytoken.model.AssumeRoleRequest;
import com.amazonaws.services.securitytoken.model.Credentials;
import com.amazonaws.services.securitytoken.model.GetSessionTokenRequest;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
import software.amazon.awssdk.core.retry.RetryPolicy;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import software.amazon.awssdk.http.apache.ProxyConfiguration;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.sts.StsClient;
import software.amazon.awssdk.services.sts.StsClientBuilder;
import software.amazon.awssdk.services.sts.model.AssumeRoleRequest;
import software.amazon.awssdk.services.sts.model.Credentials;
import software.amazon.awssdk.services.sts.model.GetSessionTokenRequest;
import software.amazon.awssdk.thirdparty.org.apache.http.client.utils.URIBuilder;
import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;
@ -38,13 +45,12 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.S3AUtils;
import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS;
import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.*;
/**
@ -71,17 +77,15 @@ public class STSClientFactory {
* @return the builder to call {@code build()}
* @throws IOException problem reading proxy secrets
*/
public static AWSSecurityTokenServiceClientBuilder builder(
public static StsClientBuilder builder(
final Configuration conf,
final String bucket,
final AWSCredentialsProvider credentials) throws IOException {
final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket,
Constants.AWS_SERVICE_IDENTIFIER_STS);
final AwsCredentialsProvider credentials) throws IOException {
String endpoint = conf.getTrimmed(DELEGATION_TOKEN_ENDPOINT,
DEFAULT_DELEGATION_TOKEN_ENDPOINT);
String region = conf.getTrimmed(DELEGATION_TOKEN_REGION,
DEFAULT_DELEGATION_TOKEN_REGION);
return builder(credentials, awsConf, endpoint, region);
return builder(credentials, conf, endpoint, region, bucket);
}
/**
@ -96,64 +100,89 @@ public class STSClientFactory {
* @return the builder to call {@code build()}
* @throws IOException problem reading proxy secrets
*/
public static AWSSecurityTokenServiceClientBuilder builder(
public static StsClientBuilder builder(
final Configuration conf,
final String bucket,
final AWSCredentialsProvider credentials,
final AwsCredentialsProvider credentials,
final String stsEndpoint,
final String stsRegion) throws IOException {
final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket,
Constants.AWS_SERVICE_IDENTIFIER_STS);
return builder(credentials, awsConf, stsEndpoint, stsRegion);
return builder(credentials, conf, stsEndpoint, stsRegion, bucket);
}
/**
* Create the builder ready for any final configuration options.
* Picks up connection settings from the Hadoop configuration, including
* proxy secrets.
* @param awsConf AWS configuration.
* @param conf AWS configuration.
* @param credentials AWS credential chain to use
* @param stsEndpoint optional endpoint "https://sns.us-west-1.amazonaws.com"
* @param stsRegion the region, e.g "us-west-1". Must be set if endpoint is.
* @param bucket bucket name
* @return the builder to call {@code build()}
* @throws IOException problem reading proxy secrets
*/
public static AWSSecurityTokenServiceClientBuilder builder(
final AWSCredentialsProvider credentials,
final ClientConfiguration awsConf,
final String stsEndpoint,
final String stsRegion) {
final AWSSecurityTokenServiceClientBuilder builder
= AWSSecurityTokenServiceClientBuilder.standard();
public static StsClientBuilder builder(final AwsCredentialsProvider credentials,
final Configuration conf, final String stsEndpoint, final String stsRegion,
final String bucket) throws IOException {
final StsClientBuilder stsClientBuilder = StsClient.builder();
Preconditions.checkArgument(credentials != null, "No credentials");
builder.withClientConfiguration(awsConf);
builder.withCredentials(credentials);
final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder =
AWSClientConfig.createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS);
final ApacheHttpClient.Builder httpClientBuilder =
AWSClientConfig.createHttpClientBuilder(conf);
final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf);
final ProxyConfiguration proxyConfig = AWSClientConfig.createProxyConfiguration(conf, bucket);
clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build());
httpClientBuilder.proxyConfiguration(proxyConfig);
stsClientBuilder.httpClientBuilder(httpClientBuilder)
.overrideConfiguration(clientOverrideConfigBuilder.build())
.credentialsProvider(credentials);
boolean destIsStandardEndpoint = STS_STANDARD.equals(stsEndpoint);
if (isNotEmpty(stsEndpoint) && !destIsStandardEndpoint) {
Preconditions.checkArgument(
isNotEmpty(stsRegion),
"STS endpoint is set to %s but no signing region was provided",
stsEndpoint);
Preconditions.checkArgument(isNotEmpty(stsRegion),
"STS endpoint is set to %s but no signing region was provided", stsEndpoint);
LOG.debug("STS Endpoint={}; region='{}'", stsEndpoint, stsRegion);
builder.withEndpointConfiguration(
new AwsClientBuilder.EndpointConfiguration(stsEndpoint, stsRegion));
stsClientBuilder.endpointOverride(getSTSEndpoint(stsEndpoint)).region(Region.of(stsRegion));
} else {
Preconditions.checkArgument(isEmpty(stsRegion),
"STS signing region set set to %s but no STS endpoint specified",
stsRegion);
"STS signing region set set to %s but no STS endpoint specified", stsRegion);
}
return builder;
return stsClientBuilder;
}
/**
* Given a endpoint string, create the endpoint URI.
*
* @param endpoint possibly null endpoint.
* @return an endpoint uri
*/
private static URI getSTSEndpoint(String endpoint) {
try {
return new URIBuilder().setScheme("https").setHost(endpoint).build();
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
}
/**
* Create an STS Client instance.
* @param tokenService STS instance
* @param stsClient STS instance
* @param invoker invoker to use
* @return an STS client bonded to that interface.
*/
public static STSClient createClientConnection(
final AWSSecurityTokenService tokenService,
final StsClient stsClient,
final Invoker invoker) {
return new STSClient(tokenService, invoker);
return new STSClient(stsClient, invoker);
}
/**
@ -161,21 +190,19 @@ public class STSClientFactory {
*/
public static final class STSClient implements Closeable {
private final AWSSecurityTokenService tokenService;
private final StsClient stsClient;
private final Invoker invoker;
private STSClient(final AWSSecurityTokenService tokenService,
private STSClient(final StsClient stsClient,
final Invoker invoker) {
this.tokenService = tokenService;
this.stsClient = stsClient;
this.invoker = invoker;
}
@Override
public void close() throws IOException {
// Since we are not using AbstractAWSSecurityTokenService, we
// don't need to worry about catching UnsupportedOperationException.
tokenService.shutdown();
stsClient.close();
}
/**
@ -192,13 +219,13 @@ public class STSClientFactory {
final TimeUnit timeUnit) throws IOException {
int durationSeconds = (int) timeUnit.toSeconds(duration);
LOG.debug("Requesting session token of duration {}", duration);
final GetSessionTokenRequest request = new GetSessionTokenRequest();
request.setDurationSeconds(durationSeconds);
final GetSessionTokenRequest request =
GetSessionTokenRequest.builder().durationSeconds(durationSeconds).build();
return invoker.retry("request session credentials", "",
true,
() ->{
LOG.info("Requesting Amazon STS Session credentials");
return tokenService.getSessionToken(request).getCredentials();
return stsClient.getSessionToken(request).credentials();
});
}
@ -222,15 +249,14 @@ public class STSClientFactory {
final TimeUnit timeUnit) throws IOException {
LOG.debug("Requesting role {} with duration {}; policy = {}",
roleARN, duration, policy);
AssumeRoleRequest request = new AssumeRoleRequest();
request.setDurationSeconds((int) timeUnit.toSeconds(duration));
request.setRoleArn(roleARN);
request.setRoleSessionName(sessionName);
AssumeRoleRequest.Builder requestBuilder =
AssumeRoleRequest.builder().durationSeconds((int) timeUnit.toSeconds(duration))
.roleArn(roleARN).roleSessionName(sessionName);
if (isNotEmpty(policy)) {
request.setPolicy(policy);
requestBuilder.policy(policy);
}
return invoker.retry("request role credentials", "", true,
() -> tokenService.assumeRole(request).getCredentials());
() -> stsClient.assumeRole(requestBuilder.build()).credentials());
}
}
}

View File

@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.auth;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.auth.signer.Aws4Signer;
import software.amazon.awssdk.auth.signer.Aws4UnsignedPayloadSigner;
import software.amazon.awssdk.auth.signer.AwsS3V4Signer;
import software.amazon.awssdk.core.signer.NoOpSigner;
import software.amazon.awssdk.core.signer.Signer;
import org.apache.hadoop.fs.s3a.S3AUtils;
/**
* Signer factory used to register and create signers.
*/
public final class SignerFactory {
private static final Logger LOG = LoggerFactory.getLogger(SignerFactory.class);
public static final String VERSION_FOUR_SIGNER = "AWS4SignerType";
public static final String VERSION_FOUR_UNSIGNED_PAYLOAD_SIGNER = "AWS4UnsignedPayloadSignerType";
public static final String NO_OP_SIGNER = "NoOpSignerType";
private static final String S3_V4_SIGNER = "AWSS3V4SignerType";
private static final Map<String, Class<? extends Signer>> SIGNERS
= new ConcurrentHashMap<>();
static {
// Register the standard signer types.
SIGNERS.put(VERSION_FOUR_SIGNER, Aws4Signer.class);
SIGNERS.put(VERSION_FOUR_UNSIGNED_PAYLOAD_SIGNER, Aws4UnsignedPayloadSigner.class);
SIGNERS.put(NO_OP_SIGNER, NoOpSigner.class);
SIGNERS.put(S3_V4_SIGNER, AwsS3V4Signer.class);
}
private SignerFactory() {
}
/**
* Register an implementation class for the given signer type.
*
* @param signerType The name of the signer type to register.
* @param signerClass The class implementing the given signature protocol.
*/
public static void registerSigner(
final String signerType,
final Class<? extends Signer> signerClass) {
if (signerType == null) {
throw new IllegalArgumentException("signerType cannot be null");
}
if (signerClass == null) {
throw new IllegalArgumentException("signerClass cannot be null");
}
SIGNERS.put(signerType, signerClass);
}
/**
* Check if the signer has already been registered.
* @param signerType signer to get
*/
public static void verifySignerRegistered(String signerType) {
Class<? extends Signer> signerClass = SIGNERS.get(signerType);
if (signerClass == null) {
throw new IllegalArgumentException("unknown signer type: " + signerType);
}
}
/**
* Create an instance of the given signer.
*
* @param signerType The signer type.
* @param configKey Config key used to configure the signer.
* @return The new signer instance.
* @throws IOException on any problem.
*/
public static Signer createSigner(String signerType, String configKey) throws IOException {
Class<?> signerClass = SIGNERS.get(signerType);
String className = signerClass.getName();
LOG.debug("Signer class is {}", className);
Signer signer =
S3AUtils.getInstanceFromReflection(signerClass, null, null, Signer.class, "create",
configKey);
return signer;
}
}

View File

@ -22,14 +22,12 @@ import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import com.amazonaws.auth.Signer;
import com.amazonaws.auth.SignerFactory;
import software.amazon.awssdk.core.signer.Signer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.auth.delegation.DelegationTokenProvider;
import org.apache.hadoop.fs.s3a.impl.V2Migration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
@ -71,8 +69,6 @@ public class SignerManager implements Closeable {
return;
}
V2Migration.v1CustomSignerUsed();
for (String customSigner : customSigners) {
String[] parts = customSigner.split(":");
if (!(parts.length == 1 || parts.length == 2 || parts.length == 3)) {
@ -119,7 +115,7 @@ public class SignerManager implements Closeable {
private static void maybeRegisterSigner(String signerName,
String signerClassName, Configuration conf) {
try {
SignerFactory.getSignerByTypeAndService(signerName, null);
SignerFactory.verifySignerRegistered(signerName);
} catch (IllegalArgumentException e) {
// Signer is not registered with the AWS SDK.
// Load the class and register the signer.

View File

@ -20,9 +20,6 @@ package org.apache.hadoop.fs.s3a.auth.delegation;
import java.util.Optional;
import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
import com.amazonaws.services.s3.model.SSECustomerKey;
import org.apache.hadoop.fs.s3a.S3AEncryptionMethods;
/**
@ -35,37 +32,30 @@ public final class EncryptionSecretOperations {
private EncryptionSecretOperations() {
}
/**
* Create SSE-C client side key encryption options on demand.
* @return an optional key to attach to a request.
/***
* Gets the SSE-C client side key if present.
*
* @param secrets source of the encryption secrets.
* @return an optional key to attach to a request.
*/
public static Optional<SSECustomerKey> createSSECustomerKey(
final EncryptionSecrets secrets) {
if (secrets.hasEncryptionKey() &&
secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_C) {
return Optional.of(new SSECustomerKey(secrets.getEncryptionKey()));
public static Optional<String> getSSECustomerKey(final EncryptionSecrets secrets) {
if (secrets.hasEncryptionKey() && secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_C) {
return Optional.of(secrets.getEncryptionKey());
} else {
return Optional.empty();
}
}
/**
* Create SSE-KMS options for a request, iff the encryption is SSE-KMS.
* @return an optional SSE-KMS param to attach to a request.
* Gets the SSE-KMS key if present, else let S3 use AWS managed key.
*
* @param secrets source of the encryption secrets.
* @return an optional key to attach to a request.
*/
public static Optional<SSEAwsKeyManagementParams> createSSEAwsKeyManagementParams(
final EncryptionSecrets secrets) {
//Use specified key, otherwise default to default master aws/s3 key by AWS
if (secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS) {
if (secrets.hasEncryptionKey()) {
return Optional.of(new SSEAwsKeyManagementParams(
secrets.getEncryptionKey()));
} else {
return Optional.of(new SSEAwsKeyManagementParams());
}
public static Optional<String> getSSEAwsKMSKey(final EncryptionSecrets secrets) {
if (secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS
&& secrets.hasEncryptionKey()) {
return Optional.of(secrets.getEncryptionKey());
} else {
return Optional.empty();
}

View File

@ -23,7 +23,7 @@ import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import com.amazonaws.services.securitytoken.model.Credentials;
import software.amazon.awssdk.services.sts.model.Credentials;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;

View File

@ -26,21 +26,18 @@ import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSSessionCredentials;
import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
import software.amazon.awssdk.services.sts.StsClient;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.S3ARetryPolicy;
import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.auth.MarshalledCredentialProvider;
import org.apache.hadoop.fs.s3a.auth.MarshalledCredentials;
import org.apache.hadoop.fs.s3a.auth.RoleModel;
@ -50,8 +47,8 @@ import org.apache.hadoop.io.Text;
import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER;
import static org.apache.hadoop.fs.s3a.Invoker.once;
import static org.apache.hadoop.fs.s3a.S3AUtils.STANDARD_AWS_PROVIDERS;
import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList;
import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS;
import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList;
import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromAWSCredentials;
import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromSTSCredentials;
import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.*;
@ -292,23 +289,22 @@ public class SessionTokenBinding extends AbstractDelegationTokenBinding {
// chain.
// As no codepath (session propagation, STS creation) will work,
// throw this.
final AWSCredentials parentCredentials = once("get credentials",
final AwsCredentials parentCredentials = once("get credentials",
"",
() -> parentAuthChain.getCredentials());
hasSessionCreds = parentCredentials instanceof AWSSessionCredentials;
() -> parentAuthChain.resolveCredentials());
hasSessionCreds = parentCredentials instanceof AwsSessionCredentials;
if (!hasSessionCreds) {
LOG.debug("Creating STS client for {}", getDescription());
invoker = new Invoker(new S3ARetryPolicy(conf), LOG_EVENT);
ClientConfiguration awsConf =
S3AUtils.createAwsConf(conf, uri.getHost(),
Constants.AWS_SERVICE_IDENTIFIER_STS);
AWSSecurityTokenService tokenService =
StsClient tokenService =
STSClientFactory.builder(parentAuthChain,
awsConf,
conf,
endpoint,
region)
region,
uri.getHost())
.build();
stsClient = Optional.of(
STSClientFactory.createClientConnection(tokenService, invoker));
@ -374,11 +370,11 @@ public class SessionTokenBinding extends AbstractDelegationTokenBinding {
+ " -duration unknown", getCanonicalUri());
}
origin += " " + CREDENTIALS_CONVERTED_TO_DELEGATION_TOKEN;
final AWSCredentials awsCredentials
= parentAuthChain.getCredentials();
if (awsCredentials instanceof AWSSessionCredentials) {
final AwsCredentials awsCredentials
= parentAuthChain.resolveCredentials();
if (awsCredentials instanceof AwsSessionCredentials) {
marshalledCredentials = fromAWSCredentials(
(AWSSessionCredentials) awsCredentials);
(AwsSessionCredentials) awsCredentials);
} else {
throw new DelegationTokenIOException(
"AWS Authentication chain is no longer supplying session secrets");

View File

@ -27,10 +27,11 @@ import java.util.Date;
import java.util.List;
import java.util.UUID;
import com.amazonaws.services.s3.model.MultipartUpload;
import software.amazon.awssdk.services.s3.model.MultipartUpload;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.VisibleForTesting;
@ -975,7 +976,7 @@ public abstract class AbstractS3ACommitter extends PathOutputCommitter
.executeWith(commitContext.getOuterSubmitter())
.suppressExceptions(suppressExceptions)
.run(u -> commitContext.abortMultipartCommit(
u.getKey(), u.getUploadId()));
u.key(), u.uploadId()));
} else {
LOG.info("No pending uploads were found");
}
@ -1300,8 +1301,8 @@ public abstract class AbstractS3ACommitter extends PathOutputCommitter
DateFormat df = DateFormat.getDateTimeInstance();
pending.forEach(u ->
LOG.info("[{}] {}",
df.format(u.getInitiated()),
u.getKey()));
df.format(Date.from(u.initiated())),
u.key()));
if (shouldAbortUploadsInCleanup()) {
LOG.warn("This committer will abort these uploads in job cleanup");
}

View File

@ -21,7 +21,7 @@ package org.apache.hadoop.fs.s3a.commit;
import java.io.IOException;
import java.util.List;
import com.amazonaws.services.s3.model.PartETag;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.statistics.IOStatistics;
@ -76,7 +76,7 @@ public class PutTracker {
* @throws IOException I/O problem or validation failure.
*/
public boolean aboutToComplete(String uploadId,
List<PartETag> parts,
List<CompletedPart> parts,
long bytesWritten,
final IOStatistics iostatistics)
throws IOException {

View File

@ -31,9 +31,10 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.amazonaws.services.s3.model.PartETag;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -215,13 +216,13 @@ public class SinglePendingCommit extends PersistentCommitData<SinglePendingCommi
* @param parts ordered list of etags.
* @throws ValidationFailure if the data is invalid
*/
public void bindCommitData(List<PartETag> parts) throws ValidationFailure {
public void bindCommitData(List<CompletedPart> parts) throws ValidationFailure {
etags = new ArrayList<>(parts.size());
int counter = 1;
for (PartETag part : parts) {
verify(part.getPartNumber() == counter,
"Expected part number %s but got %s", counter, part.getPartNumber());
etags.add(part.getETag());
for (CompletedPart part : parts) {
verify(part.partNumber() == counter,
"Expected part number %s but got %s", counter, part.partNumber());
etags.add(part.eTag());
counter++;
}
}

View File

@ -21,6 +21,8 @@ package org.apache.hadoop.fs.s3a.commit.impl;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -32,13 +34,15 @@ import java.util.stream.IntStream;
import javax.annotation.Nullable;
import com.amazonaws.services.s3.model.MultipartUpload;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.MultipartUpload;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.services.s3.model.UploadPartResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
@ -155,9 +159,9 @@ public class CommitOperations extends AbstractStoreOperation
* @param tagIds list of tags
* @return same list, now in numbered tuples
*/
public static List<PartETag> toPartEtags(List<String> tagIds) {
public static List<CompletedPart> toPartEtags(List<String> tagIds) {
return IntStream.range(0, tagIds.size())
.mapToObj(i -> new PartETag(i + 1, tagIds.get(i)))
.mapToObj(i -> CompletedPart.builder().partNumber(i + 1).eTag(tagIds.get(i)).build())
.collect(Collectors.toList());
}
@ -566,26 +570,30 @@ public class CommitOperations extends AbstractStoreOperation
numParts, length));
}
List<PartETag> parts = new ArrayList<>((int) numParts);
List<CompletedPart> parts = new ArrayList<>((int) numParts);
LOG.debug("File size is {}, number of parts to upload = {}",
length, numParts);
for (int partNumber = 1; partNumber <= numParts; partNumber += 1) {
progress.progress();
long size = Math.min(length - offset, uploadPartSize);
UploadPartRequest part;
part = writeOperations.newUploadPartRequest(
destKey,
uploadId,
partNumber,
(int) size,
null,
localFile,
offset);
part.setLastPart(partNumber == numParts);
UploadPartResult partResult = writeOperations.uploadPart(part, statistics);
offset += uploadPartSize;
parts.add(partResult.getPartETag());
// Open the file to upload.
try (InputStream fileStream = Files.newInputStream(localFile.toPath())) {
for (int partNumber = 1; partNumber <= numParts; partNumber += 1) {
progress.progress();
long size = Math.min(length - offset, uploadPartSize);
UploadPartRequest part = writeOperations.newUploadPartRequestBuilder(
destKey,
uploadId,
partNumber,
size).build();
// Read from the file input stream at current position.
RequestBody body = RequestBody.fromInputStream(fileStream, size);
UploadPartResponse response = writeOperations.uploadPart(part, body, statistics);
offset += uploadPartSize;
parts.add(CompletedPart.builder()
.partNumber(partNumber)
.eTag(response.eTag())
.build());
}
}
commitData.bindCommitData(parts);

View File

@ -20,19 +20,22 @@ package org.apache.hadoop.fs.s3a.commit.magic;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.S3ADataBlocks;
import org.apache.hadoop.fs.s3a.WriteOperationHelper;
import org.apache.hadoop.fs.s3a.commit.PutTracker;
import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit;
@ -125,7 +128,7 @@ public class MagicCommitTracker extends PutTracker {
*/
@Override
public boolean aboutToComplete(String uploadId,
List<PartETag> parts,
List<CompletedPart> parts,
long bytesWritten,
final IOStatistics iostatistics)
throws IOException {
@ -144,10 +147,9 @@ public class MagicCommitTracker extends PutTracker {
headers.put(X_HEADER_MAGIC_MARKER, Long.toString(bytesWritten));
PutObjectRequest originalDestPut = writer.createPutObjectRequest(
originalDestKey,
new ByteArrayInputStream(EMPTY),
0,
new PutObjectOptions(true, null, headers));
upload(originalDestPut);
new PutObjectOptions(true, null, headers), false);
upload(originalDestPut, new ByteArrayInputStream(EMPTY));
// build the commit summary
SinglePendingCommit commitData = new SinglePendingCommit();
@ -170,22 +172,22 @@ public class MagicCommitTracker extends PutTracker {
path, pendingPartKey, commitData);
PutObjectRequest put = writer.createPutObjectRequest(
pendingPartKey,
new ByteArrayInputStream(bytes),
bytes.length, null);
upload(put);
bytes.length, null, false);
upload(put, new ByteArrayInputStream(bytes));
return false;
}
/**
* PUT an object.
* @param request the request
* @param inputStream input stream of data to be uploaded
* @throws IOException on problems
*/
@Retries.RetryTranslated
private void upload(PutObjectRequest request) throws IOException {
trackDurationOfInvocation(trackerStatistics,
COMMITTER_MAGIC_MARKER_PUT.getSymbol(), () ->
writer.putObject(request, PutObjectOptions.keepingDirs(), null));
private void upload(PutObjectRequest request, InputStream inputStream) throws IOException {
trackDurationOfInvocation(trackerStatistics, COMMITTER_MAGIC_MARKER_PUT.getSymbol(),
() -> writer.putObject(request, PutObjectOptions.keepingDirs(),
new S3ADataBlocks.BlockUploadData(inputStream), false, null));
}
@Override

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
/**
* Enum to map AWS SDK V1 Acl values to SDK V2.
*/
public enum AWSCannedACL {
Private("private"),
PublicRead("public-read"),
PublicReadWrite("public-read-write"),
AuthenticatedRead("authenticated-read"),
AwsExecRead("aws-exec-read"),
BucketOwnerRead("bucket-owner-read"),
BucketOwnerFullControl("bucket-owner-full-control"),
LogDeliveryWrite("log-delivery-write");
private final String value;
AWSCannedACL(String value){
this.value = value;
}
public String toString() {
return this.value;
}
}

View File

@ -0,0 +1,379 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.Duration;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
import software.amazon.awssdk.core.retry.RetryPolicy;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import software.amazon.awssdk.http.apache.ProxyConfiguration;
import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.auth.SignerFactory;
import org.apache.hadoop.util.VersionInfo;
import org.apache.http.client.utils.URIBuilder;
import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3;
import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT;
import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT;
import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS;
import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES;
import static org.apache.hadoop.fs.s3a.Constants.PROXY_DOMAIN;
import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST;
import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD;
import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT;
import static org.apache.hadoop.fs.s3a.Constants.PROXY_SECURED;
import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME;
import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION;
import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT;
import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM;
import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_S3;
import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_STS;
import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT;
import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX;
/**
* Methods for configuring the S3 client.
* These methods are used when creating and configuring
* {@link software.amazon.awssdk.services.s3.S3Client} which communicates with the S3 service.
*/
public final class AWSClientConfig {
private static final Logger LOG = LoggerFactory.getLogger(AWSClientConfig.class);
private AWSClientConfig() {
}
public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf,
String awsServiceIdentifier) throws IOException {
ClientOverrideConfiguration.Builder overrideConfigBuilder =
ClientOverrideConfiguration.builder();
initRequestTimeout(conf, overrideConfigBuilder);
initUserAgent(conf, overrideConfigBuilder);
String signer = conf.getTrimmed(SIGNING_ALGORITHM, "");
if (!signer.isEmpty()) {
LOG.debug("Signer override = {}", signer);
overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER,
SignerFactory.createSigner(signer, SIGNING_ALGORITHM));
}
initSigner(conf, overrideConfigBuilder, awsServiceIdentifier);
return overrideConfigBuilder;
}
/**
* Configures the http client.
*
* @param conf The Hadoop configuration
* @return Http client builder
* @throws IOException on any problem
*/
public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf)
throws IOException {
ApacheHttpClient.Builder httpClientBuilder =
ApacheHttpClient.builder();
httpClientBuilder.maxConnections(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS,
DEFAULT_MAXIMUM_CONNECTIONS, 1));
int connectionEstablishTimeout =
S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0);
int socketTimeout = S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0);
httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout));
httpClientBuilder.socketTimeout(Duration.ofSeconds(socketTimeout));
NetworkBinding.bindSSLChannelMode(conf, httpClientBuilder);
return httpClientBuilder;
}
/**
* Configures the async http client.
*
* @param conf The Hadoop configuration
* @return Http client builder
*/
public static NettyNioAsyncHttpClient.Builder createAsyncHttpClientBuilder(Configuration conf) {
NettyNioAsyncHttpClient.Builder httpClientBuilder =
NettyNioAsyncHttpClient.builder();
httpClientBuilder.maxConcurrency(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS,
DEFAULT_MAXIMUM_CONNECTIONS, 1));
int connectionEstablishTimeout =
S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0);
int socketTimeout = S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0);
httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout));
httpClientBuilder.readTimeout(Duration.ofSeconds(socketTimeout));
httpClientBuilder.writeTimeout(Duration.ofSeconds(socketTimeout));
// TODO: Don't think you can set a socket factory for the netty client.
// NetworkBinding.bindSSLChannelMode(conf, awsConf);
return httpClientBuilder;
}
/**
* Configures the retry policy.
*
* @param conf The Hadoop configuration
* @return Retry policy builder
*/
public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) {
RetryPolicy.Builder retryPolicyBuilder = RetryPolicy.builder();
retryPolicyBuilder.numRetries(S3AUtils.intOption(conf, MAX_ERROR_RETRIES,
DEFAULT_MAX_ERROR_RETRIES, 0));
return retryPolicyBuilder;
}
/**
* Configures the proxy.
*
* @param conf The Hadoop configuration
* @param bucket Optional bucket to use to look up per-bucket proxy secrets
* @return Proxy configuration
* @throws IOException on any IO problem
*/
public static ProxyConfiguration createProxyConfiguration(Configuration conf,
String bucket) throws IOException {
ProxyConfiguration.Builder proxyConfigBuilder = ProxyConfiguration.builder();
String proxyHost = conf.getTrimmed(PROXY_HOST, "");
int proxyPort = conf.getInt(PROXY_PORT, -1);
if (!proxyHost.isEmpty()) {
if (proxyPort >= 0) {
String scheme = conf.getBoolean(PROXY_SECURED, false) ? "https" : "http";
proxyConfigBuilder.endpoint(buildURI(scheme, proxyHost, proxyPort));
} else {
if (conf.getBoolean(PROXY_SECURED, false)) {
LOG.warn("Proxy host set without port. Using HTTPS default 443");
proxyConfigBuilder.endpoint(buildURI("https", proxyHost, 443));
} else {
LOG.warn("Proxy host set without port. Using HTTP default 80");
proxyConfigBuilder.endpoint(buildURI("http", proxyHost, 80));
}
}
final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME,
null, null);
final String proxyPassword = S3AUtils.lookupPassword(bucket, conf, PROXY_PASSWORD,
null, null);
if ((proxyUsername == null) != (proxyPassword == null)) {
String msg = "Proxy error: " + PROXY_USERNAME + " or " +
PROXY_PASSWORD + " set without the other.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
proxyConfigBuilder.username(proxyUsername);
proxyConfigBuilder.password(proxyPassword);
proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN));
proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION));
if (LOG.isDebugEnabled()) {
LOG.debug("Using proxy server {}:{} as user {} with password {} on "
+ "domain {} as workstation {}", proxyHost, proxyPort, proxyUsername, proxyPassword,
PROXY_DOMAIN, PROXY_WORKSTATION);
}
} else if (proxyPort >= 0) {
String msg =
"Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST;
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
return proxyConfigBuilder.build();
}
/**
* Configures the proxy for the async http client.
*
* @param conf The Hadoop configuration
* @param bucket Optional bucket to use to look up per-bucket proxy secrets
* @return Proxy configuration
* @throws IOException on any IO problem
*/
public static software.amazon.awssdk.http.nio.netty.ProxyConfiguration
createAsyncProxyConfiguration(Configuration conf,
String bucket) throws IOException {
software.amazon.awssdk.http.nio.netty.ProxyConfiguration.Builder proxyConfigBuilder =
software.amazon.awssdk.http.nio.netty.ProxyConfiguration.builder();
String proxyHost = conf.getTrimmed(PROXY_HOST, "");
int proxyPort = conf.getInt(PROXY_PORT, -1);
if (!proxyHost.isEmpty()) {
if (proxyPort >= 0) {
String scheme = conf.getBoolean(PROXY_SECURED, false) ? "https" : "http";
proxyConfigBuilder.host(proxyHost);
proxyConfigBuilder.port(proxyPort);
proxyConfigBuilder.scheme(scheme);
} else {
if (conf.getBoolean(PROXY_SECURED, false)) {
LOG.warn("Proxy host set without port. Using HTTPS default 443");
proxyConfigBuilder.host(proxyHost);
proxyConfigBuilder.port(443);
proxyConfigBuilder.scheme("https");
} else {
LOG.warn("Proxy host set without port. Using HTTP default 80");
proxyConfigBuilder.host(proxyHost);
proxyConfigBuilder.port(80);
proxyConfigBuilder.scheme("http");
}
}
final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME,
null, null);
final String proxyPassword = S3AUtils.lookupPassword(bucket, conf, PROXY_PASSWORD,
null, null);
if ((proxyUsername == null) != (proxyPassword == null)) {
String msg = "Proxy error: " + PROXY_USERNAME + " or " +
PROXY_PASSWORD + " set without the other.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
proxyConfigBuilder.username(proxyUsername);
proxyConfigBuilder.password(proxyPassword);
// TODO: check NTLM support
// proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN));
// proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION));
if (LOG.isDebugEnabled()) {
LOG.debug("Using proxy server {}:{} as user {} with password {} on "
+ "domain {} as workstation {}", proxyHost, proxyPort, proxyUsername, proxyPassword,
PROXY_DOMAIN, PROXY_WORKSTATION);
}
} else if (proxyPort >= 0) {
String msg =
"Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST;
LOG.error(msg);
throw new IllegalArgumentException(msg);
} else {
return null;
}
return proxyConfigBuilder.build();
}
/***
* Builds a URI, throws an IllegalArgumentException in case of errors.
*
* @param host proxy host
* @param port proxy port
* @return uri with host and port
*/
private static URI buildURI(String scheme, String host, int port) {
try {
return new URIBuilder().setScheme(scheme).setHost(host).setPort(port).build();
} catch (URISyntaxException e) {
String msg =
"Proxy error: incorrect " + PROXY_HOST + " or " + PROXY_PORT;
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
}
/**
* Initializes the User-Agent header to send in HTTP requests to AWS
* services. We always include the Hadoop version number. The user also
* may set an optional custom prefix to put in front of the Hadoop version
* number. The AWS SDK internally appends its own information, which seems
* to include the AWS SDK version, OS and JVM version.
*
* @param conf Hadoop configuration
* @param clientConfig AWS SDK configuration to update
*/
private static void initUserAgent(Configuration conf,
ClientOverrideConfiguration.Builder clientConfig) {
String userAgent = "Hadoop " + VersionInfo.getVersion();
String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, "");
if (!userAgentPrefix.isEmpty()) {
userAgent = userAgentPrefix + ", " + userAgent;
}
LOG.debug("Using User-Agent: {}", userAgent);
clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent);
}
private static void initSigner(Configuration conf,
ClientOverrideConfiguration.Builder clientConfig, String awsServiceIdentifier)
throws IOException {
String configKey = null;
switch (awsServiceIdentifier) {
case AWS_SERVICE_IDENTIFIER_S3:
configKey = SIGNING_ALGORITHM_S3;
break;
case AWS_SERVICE_IDENTIFIER_STS:
configKey = SIGNING_ALGORITHM_STS;
break;
default:
// Nothing to do. The original signer override is already setup
}
if (configKey != null) {
String signerOverride = conf.getTrimmed(configKey, "");
if (!signerOverride.isEmpty()) {
LOG.debug("Signer override for {}} = {}", awsServiceIdentifier, signerOverride);
clientConfig.putAdvancedOption(SdkAdvancedClientOption.SIGNER,
SignerFactory.createSigner(signerOverride, configKey));
}
}
}
/**
* Configures request timeout.
*
* @param conf Hadoop configuration
* @param clientConfig AWS SDK configuration to update
*/
private static void initRequestTimeout(Configuration conf,
ClientOverrideConfiguration.Builder clientConfig) {
long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT,
DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS);
if (requestTimeoutMillis > Integer.MAX_VALUE) {
LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead",
requestTimeoutMillis, Integer.MAX_VALUE);
requestTimeoutMillis = Integer.MAX_VALUE;
}
if(requestTimeoutMillis > 0) {
clientConfig.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis));
}
}
}

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
/**
* Common S3 HTTP header values used throughout the Amazon Web Services S3 Java client.
*/
public interface AWSHeaders {
/*
* Standard HTTP Headers
*/
String CACHE_CONTROL = "Cache-Control";
String CONTENT_DISPOSITION = "Content-Disposition";
String CONTENT_ENCODING = "Content-Encoding";
String CONTENT_LENGTH = "Content-Length";
String CONTENT_RANGE = "Content-Range";
String CONTENT_MD5 = "Content-MD5";
String CONTENT_TYPE = "Content-Type";
String CONTENT_LANGUAGE = "Content-Language";
String DATE = "Date";
String ETAG = "ETag";
String LAST_MODIFIED = "Last-Modified";
/*
* Amazon HTTP Headers used by S3A
*/
/** S3's version ID header */
String S3_VERSION_ID = "x-amz-version-id";
/** Header describing what class of storage a user wants */
String STORAGE_CLASS = "x-amz-storage-class";
/** Header describing what archive tier the object is in, if any */
String ARCHIVE_STATUS = "x-amz-archive-status";
/** Header for optional server-side encryption algorithm */
String SERVER_SIDE_ENCRYPTION = "x-amz-server-side-encryption";
/** Range header for the get object request */
String RANGE = "Range";
/**
* Encrypted symmetric key header that is used in the Encryption Only (EO) envelope
* encryption mechanism.
*/
@Deprecated
String CRYPTO_KEY = "x-amz-key";
/** JSON-encoded description of encryption materials used during encryption */
String MATERIALS_DESCRIPTION = "x-amz-matdesc";
/** Header for the optional restore information of an object */
String RESTORE = "x-amz-restore";
/**
* Key wrapping algorithm such as "AESWrap" and "RSA/ECB/OAEPWithSHA-256AndMGF1Padding".
*/
String CRYPTO_KEYWRAP_ALGORITHM = "x-amz-wrap-alg";
/**
* Content encryption algorithm, such as "AES/GCM/NoPadding".
*/
String CRYPTO_CEK_ALGORITHM = "x-amz-cek-alg";
/**
* Headers in request indicating that the requester must be charged for data
* transfer.
*/
String REQUESTER_PAYS_HEADER = "x-amz-request-payer";
/** Header for the replication status of an Amazon S3 Object.*/
String OBJECT_REPLICATION_STATUS = "x-amz-replication-status";
String OBJECT_LOCK_MODE = "x-amz-object-lock-mode";
String OBJECT_LOCK_RETAIN_UNTIL_DATE = "x-amz-object-lock-retain-until-date";
String OBJECT_LOCK_LEGAL_HOLD_STATUS = "x-amz-object-lock-legal-hold";
}

View File

@ -20,8 +20,9 @@ package org.apache.hadoop.fs.s3a.impl;
import java.util.List;
import com.amazonaws.SdkClientException;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.core.exception.SdkClientException;
import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -30,6 +31,7 @@ import org.apache.hadoop.fs.s3a.S3AStorageStatistics;
import org.apache.hadoop.fs.s3a.Statistic;
import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext;
import static org.apache.hadoop.fs.s3a.S3AUtils.isThrottleException;
import static org.apache.hadoop.fs.s3a.Statistic.IGNORED_ERRORS;
import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_THROTTLED;
@ -113,15 +115,15 @@ public class BulkDeleteRetryHandler extends AbstractStoreOperation {
* @param deleteRequest request which failed.
*/
private void onDeleteThrottled(final DeleteObjectsRequest deleteRequest) {
final List<DeleteObjectsRequest.KeyVersion> keys = deleteRequest.getKeys();
final List<ObjectIdentifier> keys = deleteRequest.delete().objects();
final int size = keys.size();
incrementStatistic(STORE_IO_THROTTLED, size);
instrumentation.addValueToQuantiles(STORE_IO_THROTTLE_RATE, size);
THROTTLE_LOG.info(
"Bulk delete {} keys throttled -first key = {}; last = {}",
size,
keys.get(0).getKey(),
keys.get(size - 1).getKey());
keys.get(0).key(),
keys.get(size - 1).key());
}
/**

View File

@ -20,11 +20,12 @@ package org.apache.hadoop.fs.s3a.impl;
import java.util.Locale;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import software.amazon.awssdk.services.s3.model.CopyObjectRequest;
import software.amazon.awssdk.services.s3.model.CopyObjectResponse;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -211,11 +212,24 @@ public abstract class ChangeDetectionPolicy {
* null if the attribute is unavailable (such as when the policy says to use
* versionId but object versioning is not enabled for the bucket).
*/
public abstract String getRevisionId(ObjectMetadata objectMetadata,
public abstract String getRevisionId(HeadObjectResponse objectMetadata,
String uri);
/**
* Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the
* Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the
* * revision identifier from {@link GetObjectResponse}.
*
* @param getObjectResponse the response instance
* @param uri the URI of the object
* @return the revisionId string as interpreted by this policy, or potentially
* null if the attribute is unavailable (such as when the policy says to use
* versionId but object versioning is not enabled for the bucket).
*/
public abstract String getRevisionId(GetObjectResponse getObjectResponse,
String uri);
/**
* Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the
* revision identifier from {@link S3ObjectAttributes}.
*
* @param s3Attributes the object attributes
@ -226,44 +240,44 @@ public abstract class ChangeDetectionPolicy {
public abstract String getRevisionId(S3ObjectAttributes s3Attributes);
/**
* Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the
* revision identifier from {@link CopyResult}.
* Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the
* revision identifier from {@link CopyObjectResponse}.
*
* @param copyResult the copy result
* @param copyObjectResponse the copy response
* @return the revisionId string as interpreted by this policy, or potentially
* null if the attribute is unavailable (such as when the policy says to use
* versionId but object versioning is not enabled for the bucket).
*/
public abstract String getRevisionId(CopyResult copyResult);
public abstract String getRevisionId(CopyObjectResponse copyObjectResponse);
/**
* Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId}
* as a server-side qualification on the {@code GetObjectRequest}.
* Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId}
* as a server-side qualification on the {@code GetObjectRequest.Builder}.
*
* @param request the request
* @param revisionId the revision id
*/
public abstract void applyRevisionConstraint(GetObjectRequest request,
public abstract void applyRevisionConstraint(GetObjectRequest.Builder request,
String revisionId);
/**
* Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId}
* Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId}
* as a server-side qualification on the {@code CopyObjectRequest}.
*
* @param request the request
* @param requestBuilder the copy object request builder
* @param revisionId the revision id
*/
public abstract void applyRevisionConstraint(CopyObjectRequest request,
public abstract void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder,
String revisionId);
/**
* Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId}
* Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId}
* as a server-side qualification on the {@code GetObjectMetadataRequest}.
*
* @param request the request
* @param requestBuilder the head object request builder
* @param revisionId the revision id
*/
public abstract void applyRevisionConstraint(GetObjectMetadataRequest request,
public abstract void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder,
String revisionId);
/**
@ -323,7 +337,7 @@ public abstract class ChangeDetectionPolicy {
}
/**
* Change detection policy based on {@link ObjectMetadata#getETag() eTag}.
* Change detection policy based on {@link HeadObjectResponse#eTag()} () eTag}.
*/
static class ETagChangeDetectionPolicy extends ChangeDetectionPolicy {
@ -332,8 +346,13 @@ public abstract class ChangeDetectionPolicy {
}
@Override
public String getRevisionId(ObjectMetadata objectMetadata, String uri) {
return objectMetadata.getETag();
public String getRevisionId(GetObjectResponse objectMetadata, String uri) {
return objectMetadata.eTag();
}
@Override
public String getRevisionId(HeadObjectResponse objectMetadata, String uri) {
return objectMetadata.eTag();
}
@Override
@ -342,34 +361,34 @@ public abstract class ChangeDetectionPolicy {
}
@Override
public String getRevisionId(CopyResult copyResult) {
return copyResult.getETag();
public String getRevisionId(CopyObjectResponse copyObjectResponse) {
return copyObjectResponse.copyObjectResult().eTag();
}
@Override
public void applyRevisionConstraint(GetObjectRequest request,
public void applyRevisionConstraint(GetObjectRequest.Builder builder,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting get request to etag {}", revisionId);
request.withMatchingETagConstraint(revisionId);
builder.ifMatch(revisionId);
} else {
LOG.debug("No etag revision ID to use as a constraint");
}
}
@Override
public void applyRevisionConstraint(CopyObjectRequest request,
public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting copy request to etag {}", revisionId);
request.withMatchingETagConstraint(revisionId);
requestBuilder.copySourceIfMatch(revisionId);
} else {
LOG.debug("No etag revision ID to use as a constraint");
}
}
@Override
public void applyRevisionConstraint(GetObjectMetadataRequest request,
public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder,
String revisionId) {
LOG.debug("Unable to restrict HEAD request to etag; will check later");
}
@ -388,7 +407,7 @@ public abstract class ChangeDetectionPolicy {
/**
* Change detection policy based on
* {@link ObjectMetadata#getVersionId() versionId}.
* {@link HeadObjectResponse#versionId()} () versionId}.
*/
static class VersionIdChangeDetectionPolicy extends
ChangeDetectionPolicy {
@ -398,8 +417,16 @@ public abstract class ChangeDetectionPolicy {
}
@Override
public String getRevisionId(ObjectMetadata objectMetadata, String uri) {
String versionId = objectMetadata.getVersionId();
public String getRevisionId(HeadObjectResponse objectMetadata, String uri) {
return logIfNull(objectMetadata.versionId(), uri);
}
@Override
public String getRevisionId(GetObjectResponse getObjectResponse, String uri) {
return logIfNull(getObjectResponse.versionId(), uri);
}
private String logIfNull(String versionId, String uri) {
if (versionId == null) {
// this policy doesn't work if the bucket doesn't have object versioning
// enabled (which isn't by default)
@ -419,38 +446,38 @@ public abstract class ChangeDetectionPolicy {
}
@Override
public String getRevisionId(CopyResult copyResult) {
return copyResult.getVersionId();
public String getRevisionId(CopyObjectResponse copyObjectResponse) {
return copyObjectResponse.versionId();
}
@Override
public void applyRevisionConstraint(GetObjectRequest request,
public void applyRevisionConstraint(GetObjectRequest.Builder builder,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting get request to version {}", revisionId);
request.withVersionId(revisionId);
builder.versionId(revisionId);
} else {
LOG.debug("No version ID to use as a constraint");
}
}
@Override
public void applyRevisionConstraint(CopyObjectRequest request,
public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting copy request to version {}", revisionId);
request.withSourceVersionId(revisionId);
requestBuilder.sourceVersionId(revisionId);
} else {
LOG.debug("No version ID to use as a constraint");
}
}
@Override
public void applyRevisionConstraint(GetObjectMetadataRequest request,
public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting metadata request to version {}", revisionId);
request.withVersionId(revisionId);
requestBuilder.versionId(revisionId);
} else {
LOG.debug("No version ID to use as a constraint");
}
@ -482,7 +509,13 @@ public abstract class ChangeDetectionPolicy {
}
@Override
public String getRevisionId(final ObjectMetadata objectMetadata,
public String getRevisionId(final GetObjectResponse objectMetadata,
final String uri) {
return null;
}
@Override
public String getRevisionId(final HeadObjectResponse objectMetadata,
final String uri) {
return null;
}
@ -493,24 +526,24 @@ public abstract class ChangeDetectionPolicy {
}
@Override
public String getRevisionId(CopyResult copyResult) {
public String getRevisionId(CopyObjectResponse copyObjectResponse) {
return null;
}
@Override
public void applyRevisionConstraint(final GetObjectRequest request,
public void applyRevisionConstraint(final GetObjectRequest.Builder builder,
final String revisionId) {
}
@Override
public void applyRevisionConstraint(CopyObjectRequest request,
public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder,
String revisionId) {
}
@Override
public void applyRevisionConstraint(GetObjectMetadataRequest request,
public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder,
String revisionId) {
}

View File

@ -18,14 +18,14 @@
package org.apache.hadoop.fs.s3a.impl;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.services.s3.model.CopyObjectRequest;
import software.amazon.awssdk.services.s3.model.CopyObjectResponse;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -39,6 +39,7 @@ import org.apache.hadoop.fs.s3a.RemoteFileChangedException;
import org.apache.hadoop.fs.s3a.S3ObjectAttributes;
import org.apache.hadoop.fs.s3a.statistics.ChangeTrackerStatistics;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_412_PRECONDITION_FAILED;
import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull;
/**
@ -55,8 +56,6 @@ public class ChangeTracker {
private static final Logger LOG =
LoggerFactory.getLogger(ChangeTracker.class);
/** {@code 412 Precondition Failed} (HTTP/1.1 - RFC 2616) */
public static final int SC_PRECONDITION_FAILED = 412;
public static final String CHANGE_REPORTED_BY_S3 = "Change reported by S3";
/** Policy to use. */
@ -117,15 +116,15 @@ public class ChangeTracker {
/**
* Apply any revision control set by the policy if it is to be
* enforced on the server.
* @param request request to modify
* @param builder request builder to modify
* @return true iff a constraint was added.
*/
public boolean maybeApplyConstraint(
final GetObjectRequest request) {
final GetObjectRequest.Builder builder) {
if (policy.getMode() == ChangeDetectionPolicy.Mode.Server
&& revisionId != null) {
policy.applyRevisionConstraint(request, revisionId);
policy.applyRevisionConstraint(builder, revisionId);
return true;
}
return false;
@ -134,26 +133,26 @@ public class ChangeTracker {
/**
* Apply any revision control set by the policy if it is to be
* enforced on the server.
* @param request request to modify
* @param requestBuilder copy object request builder to modify
* @return true iff a constraint was added.
*/
public boolean maybeApplyConstraint(
final CopyObjectRequest request) {
final CopyObjectRequest.Builder requestBuilder) {
if (policy.getMode() == ChangeDetectionPolicy.Mode.Server
&& revisionId != null) {
policy.applyRevisionConstraint(request, revisionId);
policy.applyRevisionConstraint(requestBuilder, revisionId);
return true;
}
return false;
}
public boolean maybeApplyConstraint(
final GetObjectMetadataRequest request) {
final HeadObjectRequest.Builder requestBuilder) {
if (policy.getMode() == ChangeDetectionPolicy.Mode.Server
&& revisionId != null) {
policy.applyRevisionConstraint(request, revisionId);
policy.applyRevisionConstraint(requestBuilder, revisionId);
return true;
}
return false;
@ -168,7 +167,7 @@ public class ChangeTracker {
* @throws PathIOException raised on failure
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processResponse(final S3Object object,
public void processResponse(final GetObjectResponse object,
final String operation,
final long pos) throws PathIOException {
if (object == null) {
@ -191,24 +190,24 @@ public class ChangeTracker {
}
}
processMetadata(object.getObjectMetadata(), operation);
processMetadata(object, operation);
}
/**
* Process the response from the server for validation against the
* change policy.
* @param copyResult result of a copy operation
* @param copyObjectResponse response of a copy operation
* @throws PathIOException raised on failure
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processResponse(final CopyResult copyResult)
public void processResponse(final CopyObjectResponse copyObjectResponse)
throws PathIOException {
// ETag (sometimes, depending on encryption and/or multipart) is not the
// same on the copied object as the original. Version Id seems to never
// be the same on the copy. As such, there isn't really anything that
// can be verified on the response, except that a revision ID is present
// if required.
String newRevisionId = policy.getRevisionId(copyResult);
String newRevisionId = policy.getRevisionId(copyObjectResponse);
LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId);
if (newRevisionId == null && policy.isRequireVersion()) {
throw new NoVersionAttributeException(uri, String.format(
@ -227,13 +226,11 @@ public class ChangeTracker {
* generated (e.g. "copy", "read", "select").
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processException(SdkBaseException e, String operation) throws
public void processException(SdkException e, String operation) throws
RemoteFileChangedException {
if (e instanceof AmazonServiceException) {
AmazonServiceException serviceException = (AmazonServiceException) e;
// This isn't really going to be hit due to
// https://github.com/aws/aws-sdk-java/issues/1644
if (serviceException.getStatusCode() == SC_PRECONDITION_FAILED) {
if (e instanceof AwsServiceException) {
AwsServiceException serviceException = (AwsServiceException)e;
if (serviceException.statusCode() == SC_412_PRECONDITION_FAILED) {
versionMismatches.versionMismatchError();
throw new RemoteFileChangedException(uri, operation, String.format(
RemoteFileChangedException.PRECONDITIONS_FAILED
@ -254,12 +251,26 @@ public class ChangeTracker {
* @throws PathIOException raised on failure
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processMetadata(final ObjectMetadata metadata,
public void processMetadata(final HeadObjectResponse metadata,
final String operation) throws PathIOException {
final String newRevisionId = policy.getRevisionId(metadata, uri);
processNewRevision(newRevisionId, operation, -1);
}
/**
* Process the response from server for validation against the change
* policy.
* @param getObjectResponse response returned from server
* @param operation operation in progress
* @throws PathIOException raised on failure
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processMetadata(final GetObjectResponse getObjectResponse,
final String operation) throws PathIOException {
final String newRevisionId = policy.getRevisionId(getObjectResponse, uri);
processNewRevision(newRevisionId, operation, -1);
}
/**
* Validate a revision from the server against our expectations.
* @param newRevisionId new revision.

View File

@ -21,8 +21,8 @@ package org.apache.hadoop.fs.s3a.impl;
import javax.net.ssl.HostnameVerifier;
import java.io.IOException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.thirdparty.apache.http.conn.ssl.SSLConnectionSocketFactory;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import software.amazon.awssdk.thirdparty.org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
@ -35,13 +35,12 @@ public class ConfigureShadedAWSSocketFactory implements
NetworkBinding.ConfigureAWSSocketFactory {
@Override
public void configureSocketFactory(final ClientConfiguration awsConf,
public void configureSocketFactory(final ApacheHttpClient.Builder httpClientBuilder,
final DelegatingSSLSocketFactory.SSLChannelMode channelMode)
throws IOException {
DelegatingSSLSocketFactory.initializeDefaultFactory(channelMode);
awsConf.getApacheHttpClientConfig().setSslSocketFactory(
new SSLConnectionSocketFactory(
DelegatingSSLSocketFactory.getDefaultFactory(),
(HostnameVerifier) null));
httpClientBuilder.socketFactory(new SSLConnectionSocketFactory(
DelegatingSSLSocketFactory.getDefaultFactory(),
(HostnameVerifier) null));
}
}

View File

@ -1,80 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.transfer.Copy;
import com.amazonaws.services.s3.transfer.model.CopyResult;
/**
* Extracts the outcome of a TransferManager-executed copy operation.
*/
public final class CopyOutcome {
/**
* Result of a successful copy.
*/
private final CopyResult copyResult;
/** the copy was interrupted. */
private final InterruptedException interruptedException;
/**
* The copy raised an AWS Exception of some form.
*/
private final SdkBaseException awsException;
public CopyOutcome(CopyResult copyResult,
InterruptedException interruptedException,
SdkBaseException awsException) {
this.copyResult = copyResult;
this.interruptedException = interruptedException;
this.awsException = awsException;
}
public CopyResult getCopyResult() {
return copyResult;
}
public InterruptedException getInterruptedException() {
return interruptedException;
}
public SdkBaseException getAwsException() {
return awsException;
}
/**
* Calls {@code Copy.waitForCopyResult()} to await the result, converts
* it to a copy outcome.
* Exceptions caught and
* @param copy the copy operation.
* @return the outcome.
*/
public static CopyOutcome waitForCopy(Copy copy) {
try {
CopyResult result = copy.waitForCopyResult();
return new CopyOutcome(result, null, null);
} catch (SdkBaseException e) {
return new CopyOutcome(null, null, e);
} catch (InterruptedException e) {
return new CopyOutcome(null, e, null);
}
}
}

View File

@ -24,7 +24,7 @@ import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors;
import org.slf4j.Logger;
@ -40,6 +40,7 @@ import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.util.DurationInfo;
import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan;
import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.maybeAwaitCompletion;
@ -386,9 +387,9 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
"Delete page of %d keys", keyList.size())) {
if (!keyList.isEmpty()) {
// first delete the files.
List<DeleteObjectsRequest.KeyVersion> files = keyList.stream()
List<ObjectIdentifier> files = keyList.stream()
.filter(e -> !e.isDirMarker)
.map(e -> e.keyVersion)
.map(e -> e.objectIdentifier)
.collect(Collectors.toList());
LOG.debug("Deleting of {} file objects", files.size());
Invoker.once("Remove S3 Files",
@ -398,9 +399,9 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
false
));
// now the dirs
List<DeleteObjectsRequest.KeyVersion> dirs = keyList.stream()
List<ObjectIdentifier> dirs = keyList.stream()
.filter(e -> e.isDirMarker)
.map(e -> e.keyVersion)
.map(e -> e.objectIdentifier)
.collect(Collectors.toList());
LOG.debug("Deleting of {} directory markers", dirs.size());
// This is invoked with deleteFakeDir.
@ -422,17 +423,17 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
* to choose which statistics to update.
*/
private static final class DeleteEntry {
private final DeleteObjectsRequest.KeyVersion keyVersion;
private final ObjectIdentifier objectIdentifier;
private final boolean isDirMarker;
private DeleteEntry(final String key, final boolean isDirMarker) {
this.keyVersion = new DeleteObjectsRequest.KeyVersion(key);
this.objectIdentifier = ObjectIdentifier.builder().key(key).build();
this.isDirMarker = isDirMarker;
}
public String getKey() {
return keyVersion.getKey();
return objectIdentifier.key();
}
@Override

View File

@ -18,9 +18,9 @@
package org.apache.hadoop.fs.s3a.impl;
import com.amazonaws.AmazonServiceException;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND;
/**
* Translate from AWS SDK-wrapped exceptions into IOExceptions with
@ -49,9 +49,9 @@ public class ErrorTranslation {
* @return true if the status code and error code mean that the
* remote bucket is unknown.
*/
public static boolean isUnknownBucket(AmazonServiceException e) {
return e.getStatusCode() == SC_404
&& AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode());
public static boolean isUnknownBucket(AwsServiceException e) {
return e.statusCode() == SC_404_NOT_FOUND
&& AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.awsErrorDetails().errorCode());
}
/**
@ -62,8 +62,8 @@ public class ErrorTranslation {
* @return true if the status code and error code mean that the
* HEAD request returned 404 but the bucket was there.
*/
public static boolean isObjectNotFound(AmazonServiceException e) {
return e.getStatusCode() == SC_404 && !isUnknownBucket(e);
public static boolean isObjectNotFound(AwsServiceException e) {
return e.statusCode() == SC_404_NOT_FOUND && !isUnknownBucket(e);
}
/**

View File

@ -23,13 +23,15 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.TreeMap;
import com.amazonaws.services.s3.Headers;
import com.amazonaws.services.s3.model.ObjectMetadata;
import software.amazon.awssdk.services.s3.model.CopyObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadBucketResponse;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -75,50 +77,50 @@ public class HeaderProcessing extends AbstractStoreOperation {
* Standard HTTP header found on some S3 objects: {@value}.
*/
public static final String XA_CACHE_CONTROL =
XA_HEADER_PREFIX + Headers.CACHE_CONTROL;
XA_HEADER_PREFIX + AWSHeaders.CACHE_CONTROL;
/**
* Standard HTTP header found on some S3 objects: {@value}.
*/
public static final String XA_CONTENT_DISPOSITION =
XA_HEADER_PREFIX + Headers.CONTENT_DISPOSITION;
XA_HEADER_PREFIX + AWSHeaders.CONTENT_DISPOSITION;
/**
* Content encoding; can be configured: {@value}.
*/
public static final String XA_CONTENT_ENCODING =
XA_HEADER_PREFIX + Headers.CONTENT_ENCODING;
XA_HEADER_PREFIX + AWSHeaders.CONTENT_ENCODING;
/**
* Standard HTTP header found on some S3 objects: {@value}.
*/
public static final String XA_CONTENT_LANGUAGE =
XA_HEADER_PREFIX + Headers.CONTENT_LANGUAGE;
XA_HEADER_PREFIX + AWSHeaders.CONTENT_LANGUAGE;
/**
* Length XAttr: {@value}.
*/
public static final String XA_CONTENT_LENGTH =
XA_HEADER_PREFIX + Headers.CONTENT_LENGTH;
XA_HEADER_PREFIX + AWSHeaders.CONTENT_LENGTH;
/**
* Standard HTTP header found on some S3 objects: {@value}.
*/
public static final String XA_CONTENT_MD5 =
XA_HEADER_PREFIX + Headers.CONTENT_MD5;
XA_HEADER_PREFIX + AWSHeaders.CONTENT_MD5;
/**
* Content range: {@value}.
* This is returned on GET requests with ranges.
*/
public static final String XA_CONTENT_RANGE =
XA_HEADER_PREFIX + Headers.CONTENT_RANGE;
XA_HEADER_PREFIX + AWSHeaders.CONTENT_RANGE;
/**
* Content type: may be set when uploading.
* {@value}.
*/
public static final String XA_CONTENT_TYPE =
XA_HEADER_PREFIX + Headers.CONTENT_TYPE;
XA_HEADER_PREFIX + AWSHeaders.CONTENT_TYPE;
/**
* Etag Header {@value}.
@ -126,14 +128,14 @@ public class HeaderProcessing extends AbstractStoreOperation {
* it can be retrieved via {@code getFileChecksum(path)} if
* the S3A connector is enabled.
*/
public static final String XA_ETAG = XA_HEADER_PREFIX + Headers.ETAG;
public static final String XA_ETAG = XA_HEADER_PREFIX + AWSHeaders.ETAG;
/**
* last modified XAttr: {@value}.
*/
public static final String XA_LAST_MODIFIED =
XA_HEADER_PREFIX + Headers.LAST_MODIFIED;
XA_HEADER_PREFIX + AWSHeaders.LAST_MODIFIED;
/* AWS Specific Headers. May not be found on other S3 endpoints. */
@ -144,50 +146,50 @@ public class HeaderProcessing extends AbstractStoreOperation {
* Value {@value}.
*/
public static final String XA_ARCHIVE_STATUS =
XA_HEADER_PREFIX + Headers.ARCHIVE_STATUS;
XA_HEADER_PREFIX + AWSHeaders.ARCHIVE_STATUS;
/**
* Object legal hold status. {@value}.
*/
public static final String XA_OBJECT_LOCK_LEGAL_HOLD_STATUS =
XA_HEADER_PREFIX + Headers.OBJECT_LOCK_LEGAL_HOLD_STATUS;
XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_LEGAL_HOLD_STATUS;
/**
* Object lock mode. {@value}.
*/
public static final String XA_OBJECT_LOCK_MODE =
XA_HEADER_PREFIX + Headers.OBJECT_LOCK_MODE;
XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_MODE;
/**
* ISO8601 expiry date of object lock hold. {@value}.
*/
public static final String XA_OBJECT_LOCK_RETAIN_UNTIL_DATE =
XA_HEADER_PREFIX + Headers.OBJECT_LOCK_RETAIN_UNTIL_DATE;
XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_RETAIN_UNTIL_DATE;
/**
* Replication status for cross-region replicated objects. {@value}.
*/
public static final String XA_OBJECT_REPLICATION_STATUS =
XA_HEADER_PREFIX + Headers.OBJECT_REPLICATION_STATUS;
XA_HEADER_PREFIX + AWSHeaders.OBJECT_REPLICATION_STATUS;
/**
* Version ID; empty for non-versioned buckets/data. {@value}.
*/
public static final String XA_S3_VERSION_ID =
XA_HEADER_PREFIX + Headers.S3_VERSION_ID;
XA_HEADER_PREFIX + AWSHeaders.S3_VERSION_ID;
/**
* The server-side encryption algorithm to use
* with AWS-managed keys: {@value}.
*/
public static final String XA_SERVER_SIDE_ENCRYPTION =
XA_HEADER_PREFIX + Headers.SERVER_SIDE_ENCRYPTION;
XA_HEADER_PREFIX + AWSHeaders.SERVER_SIDE_ENCRYPTION;
/**
* Storage Class XAttr: {@value}.
*/
public static final String XA_STORAGE_CLASS =
XA_HEADER_PREFIX + Headers.STORAGE_CLASS;
XA_HEADER_PREFIX + AWSHeaders.STORAGE_CLASS;
/**
* HTTP Referrer for logs: {@value}.
@ -275,9 +277,28 @@ public class HeaderProcessing extends AbstractStoreOperation {
final Statistic statistic) throws IOException {
StoreContext context = getStoreContext();
String objectKey = context.pathToKey(path);
ObjectMetadata md;
String symbol = statistic.getSymbol();
S3AStatisticsContext instrumentation = context.getInstrumentation();
Map<String, byte[]> headers = new TreeMap<>();
HeadObjectResponse md;
// Attempting to get metadata for the root, so use head bucket.
if (objectKey.isEmpty()) {
HeadBucketResponse headBucketResponse =
trackDuration(instrumentation, symbol, () -> callbacks.getBucketMetadata());
if (headBucketResponse.sdkHttpResponse() != null
&& headBucketResponse.sdkHttpResponse().headers() != null
&& headBucketResponse.sdkHttpResponse().headers().get(AWSHeaders.CONTENT_TYPE) != null) {
maybeSetHeader(headers, XA_CONTENT_TYPE,
headBucketResponse.sdkHttpResponse().headers().get(AWSHeaders.CONTENT_TYPE).get(0));
}
maybeSetHeader(headers, XA_CONTENT_LENGTH, 0);
return headers;
}
try {
md = trackDuration(instrumentation, symbol, () ->
callbacks.getObjectMetadata(objectKey));
@ -287,59 +308,64 @@ public class HeaderProcessing extends AbstractStoreOperation {
callbacks.getObjectMetadata(objectKey + "/"));
}
// all user metadata
Map<String, String> rawHeaders = md.getUserMetadata();
Map<String, byte[]> headers = new TreeMap<>();
Map<String, String> rawHeaders = md.metadata();
rawHeaders.forEach((key, value) ->
headers.put(XA_HEADER_PREFIX + key, encodeBytes(value)));
// and add the usual content length &c, if set
maybeSetHeader(headers, XA_CACHE_CONTROL,
md.getCacheControl());
md.cacheControl());
maybeSetHeader(headers, XA_CONTENT_DISPOSITION,
md.getContentDisposition());
md.contentDisposition());
maybeSetHeader(headers, XA_CONTENT_ENCODING,
md.getContentEncoding());
md.contentEncoding());
maybeSetHeader(headers, XA_CONTENT_LANGUAGE,
md.getContentLanguage());
md.contentLanguage());
// If CSE is enabled, use the unencrypted content length.
if (md.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null
&& md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH) != null) {
maybeSetHeader(headers, XA_CONTENT_LENGTH,
md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH));
} else {
maybeSetHeader(headers, XA_CONTENT_LENGTH,
md.getContentLength());
// TODO: CSE is not supported yet, add these headers in during CSE work.
// if (md.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null
// && md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH) != null) {
// maybeSetHeader(headers, XA_CONTENT_LENGTH,
// md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH));
// } else {
// maybeSetHeader(headers, XA_CONTENT_LENGTH,
// md.contentLength());
// }
// maybeSetHeader(headers, XA_CONTENT_MD5,
// md.getContentMD5());
// TODO: Add back in else block during CSE work.
maybeSetHeader(headers, XA_CONTENT_LENGTH,
md.contentLength());
if (md.sdkHttpResponse() != null && md.sdkHttpResponse().headers() != null
&& md.sdkHttpResponse().headers().get("Content-Range") != null) {
maybeSetHeader(headers, XA_CONTENT_RANGE,
md.sdkHttpResponse().headers().get("Content-Range").get(0));
}
maybeSetHeader(headers, XA_CONTENT_MD5,
md.getContentMD5());
maybeSetHeader(headers, XA_CONTENT_RANGE,
md.getContentRange());
maybeSetHeader(headers, XA_CONTENT_TYPE,
md.getContentType());
md.contentType());
maybeSetHeader(headers, XA_ETAG,
md.getETag());
md.eTag());
maybeSetHeader(headers, XA_LAST_MODIFIED,
md.getLastModified());
Date.from(md.lastModified()));
// AWS custom headers
maybeSetHeader(headers, XA_ARCHIVE_STATUS,
md.getArchiveStatus());
md.archiveStatus());
maybeSetHeader(headers, XA_OBJECT_LOCK_LEGAL_HOLD_STATUS,
md.getObjectLockLegalHoldStatus());
md.objectLockLegalHoldStatus());
maybeSetHeader(headers, XA_OBJECT_LOCK_MODE,
md.getObjectLockMode());
md.objectLockMode());
maybeSetHeader(headers, XA_OBJECT_LOCK_RETAIN_UNTIL_DATE,
md.getObjectLockRetainUntilDate());
md.objectLockRetainUntilDate());
maybeSetHeader(headers, XA_OBJECT_REPLICATION_STATUS,
md.getReplicationStatus());
md.replicationStatus());
maybeSetHeader(headers, XA_S3_VERSION_ID,
md.getVersionId());
md.versionId());
maybeSetHeader(headers, XA_SERVER_SIDE_ENCRYPTION,
md.getSSEAlgorithm());
md.serverSideEncryptionAsString());
maybeSetHeader(headers, XA_STORAGE_CLASS,
md.getStorageClass());
maybeSetHeader(headers, XA_STORAGE_CLASS,
md.getReplicationStatus());
md.storageClassAsString());
return headers;
}
@ -458,70 +484,51 @@ public class HeaderProcessing extends AbstractStoreOperation {
}
/**
* Creates a copy of the passed {@link ObjectMetadata}.
* Does so without using the {@link ObjectMetadata#clone()} method,
* to avoid copying unnecessary headers.
* Creates a copy of the passed metadata.
* This operation does not copy the {@code X_HEADER_MAGIC_MARKER}
* header to avoid confusion. If a marker file is renamed,
* it loses information about any remapped file.
* If new fields are added to ObjectMetadata which are not
* present in the user metadata headers, they will not be picked
* up or cloned unless this operation is updated.
* @param source the {@link ObjectMetadata} to copy
* @param source the source metadata to copy
* @param dest the metadata to update; this is the return value.
* @param copyObjectRequestBuilder CopyObjectRequest builder
*/
public static void cloneObjectMetadata(ObjectMetadata source,
ObjectMetadata dest) {
public static void cloneObjectMetadata(HeadObjectResponse source,
Map<String, String> dest, CopyObjectRequest.Builder copyObjectRequestBuilder) {
// Possibly null attributes
// Allowing nulls to pass breaks it during later use
if (source.getCacheControl() != null) {
dest.setCacheControl(source.getCacheControl());
if (source.cacheControl() != null) {
copyObjectRequestBuilder.cacheControl(source.cacheControl());
}
if (source.getContentDisposition() != null) {
dest.setContentDisposition(source.getContentDisposition());
if (source.contentDisposition() != null) {
copyObjectRequestBuilder.contentDisposition(source.contentDisposition());
}
if (source.getContentEncoding() != null) {
dest.setContentEncoding(source.getContentEncoding());
if (source.contentEncoding() != null) {
copyObjectRequestBuilder.contentEncoding(source.contentEncoding());
}
if (source.getContentMD5() != null) {
dest.setContentMD5(source.getContentMD5());
if (source.contentType() != null) {
copyObjectRequestBuilder.contentType(source.contentType());
}
if (source.getContentType() != null) {
dest.setContentType(source.getContentType());
if (source.serverSideEncryption() != null) {
copyObjectRequestBuilder.serverSideEncryption(source.serverSideEncryption());
}
if (source.getExpirationTime() != null) {
dest.setExpirationTime(source.getExpirationTime());
if (source.sseCustomerAlgorithm() != null) {
copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(source.sseCustomerAlgorithm());
}
if (source.getExpirationTimeRuleId() != null) {
dest.setExpirationTimeRuleId(source.getExpirationTimeRuleId());
}
if (source.getHttpExpiresDate() != null) {
dest.setHttpExpiresDate(source.getHttpExpiresDate());
}
if (source.getLastModified() != null) {
dest.setLastModified(source.getLastModified());
}
if (source.getOngoingRestore() != null) {
dest.setOngoingRestore(source.getOngoingRestore());
}
if (source.getRestoreExpirationTime() != null) {
dest.setRestoreExpirationTime(source.getRestoreExpirationTime());
}
if (source.getSSEAlgorithm() != null) {
dest.setSSEAlgorithm(source.getSSEAlgorithm());
}
if (source.getSSECustomerAlgorithm() != null) {
dest.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm());
}
if (source.getSSECustomerKeyMd5() != null) {
dest.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5());
if (source.sseCustomerKeyMD5() != null) {
copyObjectRequestBuilder.copySourceSSECustomerKeyMD5(source.sseCustomerKeyMD5());
}
// copy user metadata except the magic marker header.
source.getUserMetadata().entrySet().stream()
source.metadata().entrySet().stream()
.filter(e -> !e.getKey().equals(X_HEADER_MAGIC_MARKER))
.forEach(e -> dest.addUserMetadata(e.getKey(), e.getValue()));
.forEach(e -> dest.put(e.getKey(), e.getValue()));
}
public interface HeaderProcessingCallbacks {
@ -534,6 +541,15 @@ public class HeaderProcessing extends AbstractStoreOperation {
* @throws IOException IO and object access problems.
*/
@Retries.RetryTranslated
ObjectMetadata getObjectMetadata(String key) throws IOException;
HeadObjectResponse getObjectMetadata(String key) throws IOException;
/**
* Retrieve the bucket metadata.
*
* @return metadata
* @throws IOException IO and object access problems.
*/
@Retries.RetryTranslated
HeadBucketResponse getBucketMetadata() throws IOException;
}
}

View File

@ -110,11 +110,58 @@ public final class InternalConstants {
S3A_OPENFILE_KEYS = Collections.unmodifiableSet(keys);
}
/** 200 status code: OK. */
public static final int SC_200_OK = 200;
/** 301 status code: Moved Permanently. */
public static final int SC_301_MOVED_PERMANENTLY = 301;
/** 307 status code: Temporary Redirect. */
public static final int SC_307_TEMPORARY_REDIRECT = 307;
/** 400 status code: Bad Request. */
public static final int SC_400_BAD_REQUEST = 400;
/** 401 status code: Unauthorized. */
public static final int SC_401_UNAUTHORIZED = 401;
/** 403 status code: Forbidden. */
public static final int SC_403_FORBIDDEN = 403;
/** 403 error code. */
public static final int SC_403 = 403;
@Deprecated
public static final int SC_403 = SC_403_FORBIDDEN;
/** 404 status code: Not Found. */
public static final int SC_404_NOT_FOUND = 404;
/** 404 error code. */
public static final int SC_404 = 404;
@Deprecated
public static final int SC_404 = SC_404_NOT_FOUND;
/** 405 status code: Method Not Allowed. */
public static final int SC_405_METHOD_NOT_ALLOWED = 405;
/** 410 status code: Gone. */
public static final int SC_410_GONE = 410;
/** 412 status code: Precondition Failed. */
public static final int SC_412_PRECONDITION_FAILED = 412;
/** 416 status code: Range Not Satisfiable. */
public static final int SC_416_RANGE_NOT_SATISFIABLE = 416;
/** 443 status code: No Response (unofficial). */
public static final int SC_443_NO_RESPONSE = 443;
/** 444 status code: No Response (unofficial). */
public static final int SC_444_NO_RESPONSE = 444;
/** 500 status code: Internal Server Error. */
public static final int SC_500_INTERNAL_SERVER_ERROR = 500;
/** 503 status code: Service Unavailable. */
public static final int SC_503_SERVICE_UNAVAILABLE = 503;
/** Name of the log for throttling events. Value: {@value}. */
public static final String THROTTLE_LOG_NAME =

View File

@ -22,26 +22,32 @@ import java.io.IOException;
import java.nio.file.AccessDeniedException;
import java.util.List;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import software.amazon.awssdk.services.s3.model.S3Error;
import software.amazon.awssdk.services.s3.model.S3Exception;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.s3a.AWSS3IOException;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_200_OK;
/**
* Support for Multi Object Deletion.
* This is used to be a complex piece of code as it was required to
* update s3guard.
* Now all that is left is the exception extraction for better
* reporting,
* Exception raised in {@link S3AFileSystem#deleteObjects} when
* one or more of the keys could not be deleted.
*
* Used to reproduce the behaviour of SDK v1 for partial failures
* on DeleteObjects. In SDK v2, the errors are returned as part of
* the response objects.
*/
public final class MultiObjectDeleteSupport {
@InterfaceAudience.Public
@InterfaceStability.Unstable
public class MultiObjectDeleteException extends S3Exception {
private static final Logger LOG = LoggerFactory.getLogger(
MultiObjectDeleteSupport.class);
private MultiObjectDeleteSupport() {
}
MultiObjectDeleteException.class);
/**
* This is the exception exit code if access was denied on a delete.
@ -49,6 +55,17 @@ public final class MultiObjectDeleteSupport {
*/
public static final String ACCESS_DENIED = "AccessDenied";
private final List<S3Error> errors;
public MultiObjectDeleteException(List<S3Error> errors) {
super(builder().message(errors.toString()).statusCode(SC_200_OK));
this.errors = errors;
}
public List<S3Error> errors() {
return errors;
}
/**
* A {@code MultiObjectDeleteException} is raised if one or more
* paths listed in a bulk DELETE operation failed.
@ -58,29 +75,23 @@ public final class MultiObjectDeleteSupport {
* the causes, otherwise grabs the status code and uses it in the
* returned exception.
* @param message text for the exception
* @param deleteException the delete exception. to translate
* @return an IOE with more detail.
*/
public static IOException translateDeleteException(
final String message,
final MultiObjectDeleteException deleteException) {
List<MultiObjectDeleteException.DeleteError> errors
= deleteException.getErrors();
public IOException translateException(final String message) {
LOG.info("Bulk delete operation failed to delete all objects;"
+ " failure count = {}",
errors.size());
errors().size());
final StringBuilder result = new StringBuilder(
errors.size() * 256);
errors().size() * 256);
result.append(message).append(": ");
String exitCode = "";
for (MultiObjectDeleteException.DeleteError error :
deleteException.getErrors()) {
String code = error.getCode();
String item = String.format("%s: %s%s: %s%n", code, error.getKey(),
(error.getVersionId() != null
? (" (" + error.getVersionId() + ")")
for (S3Error error : errors()) {
String code = error.code();
String item = String.format("%s: %s%s: %s%n", code, error.key(),
(error.versionId() != null
? (" (" + error.versionId() + ")")
: ""),
error.getMessage());
error.message());
LOG.info(item);
result.append(item);
if (exitCode == null || exitCode.isEmpty() || ACCESS_DENIED.equals(code)) {
@ -89,9 +100,9 @@ public final class MultiObjectDeleteSupport {
}
if (ACCESS_DENIED.equals(exitCode)) {
return (IOException) new AccessDeniedException(result.toString())
.initCause(deleteException);
.initCause(this);
} else {
return new AWSS3IOException(result.toString(), deleteException);
return new AWSS3IOException(result.toString(), this);
}
}
}

View File

@ -23,9 +23,9 @@ import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import com.amazonaws.ClientConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.NetUtils;
@ -56,13 +56,12 @@ public final class NetworkBinding {
* so as to avoid
* @param conf the {@link Configuration} used to get the client specified
* value of {@code SSL_CHANNEL_MODE}
* @param awsConf the {@code ClientConfiguration} to set the
* SSLConnectionSocketFactory for.
* @param httpClientBuilder the http client builder.
* @throws IOException if there is an error while initializing the
* {@code SSLSocketFactory} other than classloader problems.
*/
public static void bindSSLChannelMode(Configuration conf,
ClientConfiguration awsConf) throws IOException {
ApacheHttpClient.Builder httpClientBuilder) throws IOException {
// Validate that SSL_CHANNEL_MODE is set to a valid value.
String channelModeString = conf.getTrimmed(
@ -89,7 +88,7 @@ public final class NetworkBinding {
(Class<? extends ConfigureAWSSocketFactory>) Class.forName(BINDING_CLASSNAME);
clazz.getConstructor()
.newInstance()
.configureSocketFactory(awsConf, channelMode);
.configureSocketFactory(httpClientBuilder, channelMode);
} catch (ClassNotFoundException | NoSuchMethodException |
IllegalAccessException | InstantiationException |
InvocationTargetException | LinkageError e) {
@ -103,7 +102,7 @@ public final class NetworkBinding {
* works with the shaded AWS libraries to exist in their own class.
*/
interface ConfigureAWSSocketFactory {
void configureSocketFactory(ClientConfiguration awsConf,
void configureSocketFactory(ApacheHttpClient.Builder httpClientBuilder,
DelegatingSSLSocketFactory.SSLChannelMode channelMode)
throws IOException;
}

View File

@ -22,10 +22,9 @@ import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.List;
import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.services.s3.model.CopyObjectResponse;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.InvalidRequestException;
@ -127,7 +126,7 @@ public interface OperationCallbacks {
* @throws IOException Other IO problems
*/
@Retries.RetryTranslated
CopyResult copyFile(String srcKey,
CopyObjectResponse copyFile(String srcKey,
String destKey,
S3ObjectAttributes srcAttributes,
S3AReadOpContext readContext)
@ -142,14 +141,14 @@ public interface OperationCallbacks {
* a mistaken attempt to delete the root directory.
* @throws MultiObjectDeleteException one or more of the keys could not
* be deleted in a multiple object delete operation.
* @throws AmazonClientException amazon-layer failure.
* @throws AwsServiceException amazon-layer failure.
* @throws IOException other IO Exception.
*/
@Retries.RetryRaw
void removeKeys(
List<DeleteObjectsRequest.KeyVersion> keysToDelete,
List<ObjectIdentifier> keysToDelete,
boolean deleteFakeDir)
throws MultiObjectDeleteException, AmazonClientException,
throws MultiObjectDeleteException, AwsServiceException,
IOException;
/**

View File

@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
/**
* Interface for progress listeners to implement.
*/
public interface ProgressListener {
default void progressChanged(ProgressListenerEvent eventType, long bytesTransferred) {};
}

View File

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
/**
* Enum for progress listener events.
*/
public enum ProgressListenerEvent {
REQUEST_BYTE_TRANSFER_EVENT,
TRANSFER_PART_STARTED_EVENT,
TRANSFER_PART_COMPLETED_EVENT,
TRANSFER_PART_FAILED_EVENT;
}

View File

@ -25,9 +25,8 @@ import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.AmazonClientException;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -122,7 +121,7 @@ public class RenameOperation extends ExecutingStoreOperation<Long> {
/**
* list of keys to delete on the next (bulk) delete call.
*/
private final List<DeleteObjectsRequest.KeyVersion> keysToDelete =
private final List<ObjectIdentifier> keysToDelete =
new ArrayList<>();
/**
@ -199,7 +198,7 @@ public class RenameOperation extends ExecutingStoreOperation<Long> {
*/
private void queueToDelete(Path path, String key) {
LOG.debug("Queueing to delete {}", path);
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(key));
keysToDelete.add(ObjectIdentifier.builder().key(key).build());
}
/**
@ -268,7 +267,7 @@ public class RenameOperation extends ExecutingStoreOperation<Long> {
} else {
recursiveDirectoryRename();
}
} catch (AmazonClientException | IOException ex) {
} catch (SdkException | IOException ex) {
// rename failed.
// block for all ongoing copies to complete, successfully or not
try {
@ -572,7 +571,7 @@ public class RenameOperation extends ExecutingStoreOperation<Long> {
*/
@Retries.RetryTranslated
private void removeSourceObjects(
final List<DeleteObjectsRequest.KeyVersion> keys)
final List<ObjectIdentifier> keys)
throws IOException {
// remove the keys
@ -580,9 +579,9 @@ public class RenameOperation extends ExecutingStoreOperation<Long> {
// who is trying to debug why objects are no longer there.
if (LOG.isDebugEnabled()) {
LOG.debug("Initiating delete operation for {} objects", keys.size());
for (DeleteObjectsRequest.KeyVersion key : keys) {
LOG.debug(" {} {}", key.getKey(),
key.getVersion() != null ? key.getVersion() : "");
for (ObjectIdentifier objectIdentifier : keys) {
LOG.debug(" {} {}", objectIdentifier.key(),
objectIdentifier.versionId() != null ? objectIdentifier.versionId() : "");
}
}
@ -619,10 +618,10 @@ public class RenameOperation extends ExecutingStoreOperation<Long> {
protected IOException convertToIOException(final Exception ex) {
if (ex instanceof IOException) {
return (IOException) ex;
} else if (ex instanceof SdkBaseException) {
} else if (ex instanceof SdkException) {
return translateException("rename " + sourcePath + " to " + destPath,
sourcePath.toString(),
(SdkBaseException) ex);
(SdkException) ex);
} else {
// should never happen, but for completeness
return new IOException(ex);

View File

@ -18,38 +18,36 @@
package org.apache.hadoop.fs.s3a.impl;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nullable;
import com.amazonaws.AmazonWebServiceRequest;
import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.ListMultipartUploadsRequest;
import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
import com.amazonaws.services.s3.model.SSECustomerKey;
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
import com.amazonaws.services.s3.model.StorageClass;
import com.amazonaws.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.core.SdkRequest;
import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.CopyObjectRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest;
import software.amazon.awssdk.services.s3.model.ListObjectsRequest;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
import software.amazon.awssdk.services.s3.model.MetadataDirective;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest;
import software.amazon.awssdk.services.s3.model.ServerSideEncryption;
import software.amazon.awssdk.services.s3.model.StorageClass;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.utils.Md5Utils;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -79,8 +77,8 @@ import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.
* This is where audit span information is added to the requests,
* until it is done in the AWS SDK itself.
*
* All created requests will be passed through
* {@link PrepareRequest#prepareRequest(AmazonWebServiceRequest)} before
* All created request builders will be passed to
* {@link PrepareRequest#prepareRequest(SdkRequest.Builder)} before
* being returned to the caller.
*/
public class RequestFactoryImpl implements RequestFactory {
@ -101,7 +99,7 @@ public class RequestFactoryImpl implements RequestFactory {
/**
* ACL For new objects.
*/
private final CannedAccessControlList cannedACL;
private final String cannedACL;
/**
* Max number of multipart entries allowed in a large
@ -147,14 +145,15 @@ public class RequestFactoryImpl implements RequestFactory {
/**
* Preflight preparation of AWS request.
* @param <T> web service request
* @return prepared entry.
* @param <T> web service request builder
* @return prepared builder.
*/
@Retries.OnceRaw
private <T extends AmazonWebServiceRequest> T prepareRequest(T t) {
return requestPreparer != null
? requestPreparer.prepareRequest(t)
: t;
private <T extends SdkRequest.Builder> T prepareRequest(T t) {
if (requestPreparer != null) {
requestPreparer.prepareRequest(t);
}
return t;
}
/**
@ -162,7 +161,7 @@ public class RequestFactoryImpl implements RequestFactory {
* @return an ACL, if any
*/
@Override
public CannedAccessControlList getCannedACL() {
public String getCannedACL() {
return cannedACL;
}
@ -174,29 +173,6 @@ public class RequestFactoryImpl implements RequestFactory {
return bucket;
}
/**
* Create the AWS SDK structure used to configure SSE,
* if the encryption secrets contain the information/settings for this.
* @return an optional set of KMS Key settings
*/
@Override
public Optional<SSEAwsKeyManagementParams> generateSSEAwsKeyParams() {
return EncryptionSecretOperations.createSSEAwsKeyManagementParams(
encryptionSecrets);
}
/**
* Create the SSE-C structure for the AWS SDK, if the encryption secrets
* contain the information/settings for this.
* This will contain a secret extracted from the bucket/configuration.
* @return an optional customer key.
*/
@Override
public Optional<SSECustomerKey> generateSSECustomerKey() {
return EncryptionSecretOperations.createSSECustomerKey(
encryptionSecrets);
}
/**
* Get the encryption algorithm of this endpoint.
* @return the encryption algorithm.
@ -227,309 +203,323 @@ public class RequestFactoryImpl implements RequestFactory {
/**
* Sets server side encryption parameters to the part upload
* request when encryption is enabled.
* @param request upload part request
* @param builder upload part request builder
*/
protected void setOptionalUploadPartRequestParameters(
UploadPartRequest request) {
generateSSECustomerKey().ifPresent(request::setSSECustomerKey);
protected void uploadPartEncryptionParameters(
UploadPartRequest.Builder builder) {
// need to set key to get objects encrypted with SSE_C
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> {
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey)
.sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
}
/**
* Sets server side encryption parameters to the GET reuquest.
* request when encryption is enabled.
* @param request upload part request
*/
protected void setOptionalGetObjectMetadataParameters(
GetObjectMetadataRequest request) {
generateSSECustomerKey().ifPresent(request::setSSECustomerKey);
}
private CopyObjectRequest.Builder buildCopyObjectRequest() {
/**
* Set the optional parameters when initiating the request (encryption,
* headers, storage, etc).
* @param request request to patch.
*/
protected void setOptionalMultipartUploadRequestParameters(
InitiateMultipartUploadRequest request) {
generateSSEAwsKeyParams().ifPresent(request::setSSEAwsKeyManagementParams);
generateSSECustomerKey().ifPresent(request::setSSECustomerKey);
}
CopyObjectRequest.Builder copyObjectRequestBuilder = CopyObjectRequest.builder();
/**
* Set the optional parameters for a PUT request.
* @param request request to patch.
*/
protected void setOptionalPutRequestParameters(PutObjectRequest request) {
generateSSEAwsKeyParams().ifPresent(request::setSSEAwsKeyManagementParams);
generateSSECustomerKey().ifPresent(request::setSSECustomerKey);
}
/**
* Set the optional metadata for an object being created or copied.
* @param metadata to update.
* @param isDirectoryMarker is this for a directory marker?
*/
protected void setOptionalObjectMetadata(ObjectMetadata metadata,
boolean isDirectoryMarker) {
final S3AEncryptionMethods algorithm
= getServerSideEncryptionAlgorithm();
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
metadata.setSSEAlgorithm(algorithm.getMethod());
if (contentEncoding != null) {
copyObjectRequestBuilder.contentEncoding(contentEncoding);
}
if (contentEncoding != null && !isDirectoryMarker) {
metadata.setContentEncoding(contentEncoding);
}
}
/**
* Create a new object metadata instance.
* Any standard metadata headers are added here, for example:
* encryption.
*
* @param length length of data to set in header; Ignored if negative
* @return a new metadata instance
*/
@Override
public ObjectMetadata newObjectMetadata(long length) {
return createObjectMetadata(length, false);
}
/**
* Create a new object metadata instance.
* Any standard metadata headers are added here, for example:
* encryption.
*
* @param length length of data to set in header; Ignored if negative
* @param isDirectoryMarker is this for a directory marker?
* @return a new metadata instance
*/
private ObjectMetadata createObjectMetadata(long length, boolean isDirectoryMarker) {
final ObjectMetadata om = new ObjectMetadata();
setOptionalObjectMetadata(om, isDirectoryMarker);
if (length >= 0) {
om.setContentLength(length);
}
return om;
return copyObjectRequestBuilder;
}
@Override
public CopyObjectRequest newCopyObjectRequest(String srcKey,
public CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey,
String dstKey,
ObjectMetadata srcom) {
CopyObjectRequest copyObjectRequest =
new CopyObjectRequest(getBucket(), srcKey, getBucket(), dstKey);
ObjectMetadata dstom = newObjectMetadata(srcom.getContentLength());
HeaderProcessing.cloneObjectMetadata(srcom, dstom);
setOptionalObjectMetadata(dstom, false);
copyEncryptionParameters(srcom, copyObjectRequest);
copyObjectRequest.setCannedAccessControlList(cannedACL);
copyObjectRequest.setNewObjectMetadata(dstom);
Optional.ofNullable(srcom.getStorageClass())
.ifPresent(copyObjectRequest::setStorageClass);
return prepareRequest(copyObjectRequest);
HeadObjectResponse srcom) {
CopyObjectRequest.Builder copyObjectRequestBuilder = buildCopyObjectRequest();
Map<String, String> dstom = new HashMap<>();
HeaderProcessing.cloneObjectMetadata(srcom, dstom, copyObjectRequestBuilder);
copyEncryptionParameters(srcom, copyObjectRequestBuilder);
copyObjectRequestBuilder
.metadata(dstom)
.metadataDirective(MetadataDirective.REPLACE)
.acl(cannedACL);
if (srcom.storageClass() != null) {
copyObjectRequestBuilder.storageClass(srcom.storageClass());
}
copyObjectRequestBuilder.destinationBucket(getBucket())
.destinationKey(dstKey).sourceBucket(getBucket()).sourceKey(srcKey);
return prepareRequest(copyObjectRequestBuilder);
}
/**
* Propagate encryption parameters from source file if set else use the
* current filesystem encryption settings.
* @param copyObjectRequestBuilder copy object request builder.
* @param srcom source object metadata.
* @param copyObjectRequest copy object request body.
*/
protected void copyEncryptionParameters(
ObjectMetadata srcom,
CopyObjectRequest copyObjectRequest) {
String sourceKMSId = srcom.getSSEAwsKmsKeyId();
protected void copyEncryptionParameters(HeadObjectResponse srcom,
CopyObjectRequest.Builder copyObjectRequestBuilder) {
final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm();
String sourceKMSId = srcom.ssekmsKeyId();
if (isNotEmpty(sourceKMSId)) {
// source KMS ID is propagated
LOG.debug("Propagating SSE-KMS settings from source {}",
sourceKMSId);
copyObjectRequest.setSSEAwsKeyManagementParams(
new SSEAwsKeyManagementParams(sourceKMSId));
copyObjectRequestBuilder.ssekmsKeyId(sourceKMSId);
return;
}
switch (getServerSideEncryptionAlgorithm()) {
case SSE_S3:
/* no-op; this is set in destination object metadata */
break;
case SSE_C:
generateSSECustomerKey().ifPresent(customerKey -> {
copyObjectRequest.setSourceSSECustomerKey(customerKey);
copyObjectRequest.setDestinationSSECustomerKey(customerKey);
});
break;
case SSE_KMS:
generateSSEAwsKeyParams().ifPresent(
copyObjectRequest::setSSEAwsKeyManagementParams);
break;
default:
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
copyObjectRequestBuilder.serverSideEncryption(algorithm.getMethod());
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
// Set the KMS key if present, else S3 uses AWS managed key.
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(kmsKey -> copyObjectRequestBuilder.ssekmsKeyId(kmsKey));
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
.ifPresent(base64customerKey -> {
copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(
ServerSideEncryption.AES256.name()).copySourceSSECustomerKey(base64customerKey)
.copySourceSSECustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)))
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
}
}
/**
* Create a putObject request.
* Adds the ACL, storage class and metadata
* @param key key of object
* @param metadata metadata header
* @param options options for the request, including headers
* @param srcfile source file
* @return the request
* @param length length of object to be uploaded
* @param isDirectoryMarker true if object to be uploaded is a directory marker
* @return the request builder
*/
@Override
public PutObjectRequest newPutObjectRequest(String key,
ObjectMetadata metadata,
public PutObjectRequest.Builder newPutObjectRequestBuilder(String key,
final PutObjectOptions options,
File srcfile) {
Preconditions.checkNotNull(srcfile);
PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key,
srcfile);
maybeSetMetadata(options, metadata);
setOptionalPutRequestParameters(putObjectRequest);
putObjectRequest.setCannedAcl(cannedACL);
if (storageClass != null) {
putObjectRequest.setStorageClass(storageClass);
}
putObjectRequest.setMetadata(metadata);
return prepareRequest(putObjectRequest);
}
long length,
boolean isDirectoryMarker) {
/**
* Create a {@link PutObjectRequest} request.
* The metadata is assumed to have been configured with the size of the
* operation.
* @param key key of object
* @param metadata metadata header
* @param options options for the request
* @param inputStream source data.
* @return the request
*/
@Override
public PutObjectRequest newPutObjectRequest(String key,
ObjectMetadata metadata,
@Nullable final PutObjectOptions options,
InputStream inputStream) {
Preconditions.checkNotNull(inputStream);
Preconditions.checkArgument(isNotEmpty(key), "Null/empty key");
maybeSetMetadata(options, metadata);
PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key,
inputStream, metadata);
setOptionalPutRequestParameters(putObjectRequest);
putObjectRequest.setCannedAcl(cannedACL);
if (storageClass != null) {
putObjectRequest.setStorageClass(storageClass);
PutObjectRequest.Builder putObjectRequestBuilder =
buildPutObjectRequest(length, isDirectoryMarker);
putObjectRequestBuilder.bucket(getBucket()).key(key);
if (options != null) {
putObjectRequestBuilder.metadata(options.getHeaders());
}
putEncryptionParameters(putObjectRequestBuilder);
if (storageClass != null) {
putObjectRequestBuilder.storageClass(storageClass);
}
return prepareRequest(putObjectRequestBuilder);
}
private PutObjectRequest.Builder buildPutObjectRequest(long length, boolean isDirectoryMarker) {
PutObjectRequest.Builder putObjectRequestBuilder = PutObjectRequest.builder();
putObjectRequestBuilder.acl(cannedACL);
if (length >= 0) {
putObjectRequestBuilder.contentLength(length);
}
if (contentEncoding != null && !isDirectoryMarker) {
putObjectRequestBuilder.contentEncoding(contentEncoding);
}
return putObjectRequestBuilder;
}
private void putEncryptionParameters(PutObjectRequest.Builder putObjectRequestBuilder) {
final S3AEncryptionMethods algorithm
= getServerSideEncryptionAlgorithm();
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
putObjectRequestBuilder.serverSideEncryption(algorithm.getMethod());
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
// Set the KMS key if present, else S3 uses AWS managed key.
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(kmsKey -> putObjectRequestBuilder.ssekmsKeyId(kmsKey));
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
.ifPresent(base64customerKey -> {
putObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
}
return prepareRequest(putObjectRequest);
}
@Override
public PutObjectRequest newDirectoryMarkerRequest(String directory) {
public PutObjectRequest.Builder newDirectoryMarkerRequest(String directory) {
String key = directory.endsWith("/")
? directory
: (directory + "/");
// an input stream which is always empty
final InputStream inputStream = new InputStream() {
@Override
public int read() throws IOException {
return -1;
}
};
// preparation happens in here
final ObjectMetadata metadata = createObjectMetadata(0L, true);
metadata.setContentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY);
PutObjectRequest.Builder putObjectRequestBuilder = buildPutObjectRequest(0L, true);
PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key,
inputStream, metadata);
setOptionalPutRequestParameters(putObjectRequest);
putObjectRequest.setCannedAcl(cannedACL);
return prepareRequest(putObjectRequest);
putObjectRequestBuilder.bucket(getBucket()).key(key)
.contentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY);
putEncryptionParameters(putObjectRequestBuilder);
return prepareRequest(putObjectRequestBuilder);
}
@Override
public ListMultipartUploadsRequest
newListMultipartUploadsRequest(String prefix) {
ListMultipartUploadsRequest request = new ListMultipartUploadsRequest(
getBucket());
public ListMultipartUploadsRequest.Builder
newListMultipartUploadsRequestBuilder(String prefix) {
ListMultipartUploadsRequest.Builder requestBuilder = ListMultipartUploadsRequest.builder();
requestBuilder.bucket(getBucket());
if (prefix != null) {
request.setPrefix(prefix);
requestBuilder.prefix(prefix);
}
return prepareRequest(request);
return prepareRequest(requestBuilder);
}
@Override
public AbortMultipartUploadRequest newAbortMultipartUploadRequest(
public AbortMultipartUploadRequest.Builder newAbortMultipartUploadRequestBuilder(
String destKey,
String uploadId) {
return prepareRequest(new AbortMultipartUploadRequest(getBucket(),
destKey,
uploadId));
AbortMultipartUploadRequest.Builder requestBuilder =
AbortMultipartUploadRequest.builder().bucket(getBucket()).key(destKey).uploadId(uploadId);
return prepareRequest(requestBuilder);
}
private void multipartUploadEncryptionParameters(
CreateMultipartUploadRequest.Builder mpuRequestBuilder) {
final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm();
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
mpuRequestBuilder.serverSideEncryption(algorithm.getMethod());
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
// Set the KMS key if present, else S3 uses AWS managed key.
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(kmsKey -> mpuRequestBuilder.ssekmsKeyId(kmsKey));
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
.ifPresent(base64customerKey -> {
mpuRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
}
}
@Override
public InitiateMultipartUploadRequest newMultipartUploadRequest(
public CreateMultipartUploadRequest.Builder newMultipartUploadRequestBuilder(
final String destKey,
@Nullable final PutObjectOptions options) throws PathIOException {
if (!isMultipartUploadEnabled) {
throw new PathIOException(destKey, "Multipart uploads are disabled.");
}
final ObjectMetadata objectMetadata = newObjectMetadata(-1);
maybeSetMetadata(options, objectMetadata);
final InitiateMultipartUploadRequest initiateMPURequest =
new InitiateMultipartUploadRequest(getBucket(),
destKey,
objectMetadata);
initiateMPURequest.setCannedACL(getCannedACL());
if (getStorageClass() != null) {
initiateMPURequest.withStorageClass(getStorageClass());
CreateMultipartUploadRequest.Builder requestBuilder = CreateMultipartUploadRequest.builder();
if (contentEncoding != null) {
requestBuilder.contentEncoding(contentEncoding);
}
setOptionalMultipartUploadRequestParameters(initiateMPURequest);
return prepareRequest(initiateMPURequest);
if (options != null) {
requestBuilder.metadata(options.getHeaders());
}
requestBuilder.bucket(getBucket()).key(destKey).acl(cannedACL);
multipartUploadEncryptionParameters(requestBuilder);
if (storageClass != null) {
requestBuilder.storageClass(storageClass);
}
return prepareRequest(requestBuilder);
}
@Override
public CompleteMultipartUploadRequest newCompleteMultipartUploadRequest(
public CompleteMultipartUploadRequest.Builder newCompleteMultipartUploadRequestBuilder(
String destKey,
String uploadId,
List<PartETag> partETags) {
List<CompletedPart> partETags) {
// a copy of the list is required, so that the AWS SDK doesn't
// attempt to sort an unmodifiable list.
return prepareRequest(new CompleteMultipartUploadRequest(bucket,
destKey, uploadId, new ArrayList<>(partETags)));
CompleteMultipartUploadRequest.Builder requestBuilder =
CompleteMultipartUploadRequest.builder().bucket(bucket).key(destKey).uploadId(uploadId)
.multipartUpload(CompletedMultipartUpload.builder().parts(partETags).build());
return prepareRequest(requestBuilder);
}
@Override
public GetObjectMetadataRequest newGetObjectMetadataRequest(String key) {
GetObjectMetadataRequest request =
new GetObjectMetadataRequest(getBucket(), key);
//SSE-C requires to be filled in if enabled for object metadata
setOptionalGetObjectMetadataParameters(request);
return prepareRequest(request);
public HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key) {
HeadObjectRequest.Builder headObjectRequestBuilder =
HeadObjectRequest.builder().bucket(getBucket()).key(key);
// need to set key to get metadata for objects encrypted with SSE_C
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> {
headObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey)
.sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
return prepareRequest(headObjectRequestBuilder);
}
@Override
public GetObjectRequest newGetObjectRequest(String key) {
GetObjectRequest request = new GetObjectRequest(bucket, key);
generateSSECustomerKey().ifPresent(request::setSSECustomerKey);
public HeadBucketRequest.Builder newHeadBucketRequestBuilder(String bucketName) {
return prepareRequest(request);
HeadBucketRequest.Builder headBucketRequestBuilder =
HeadBucketRequest.builder().bucket(bucketName);
return prepareRequest(headBucketRequestBuilder);
}
@Override
public UploadPartRequest newUploadPartRequest(
public GetObjectRequest.Builder newGetObjectRequestBuilder(String key) {
GetObjectRequest.Builder builder = GetObjectRequest.builder()
.bucket(bucket)
.key(key);
// need to set key to get objects encrypted with SSE_C
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> {
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey)
.sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
return prepareRequest(builder);
}
@Override
public UploadPartRequest.Builder newUploadPartRequestBuilder(
String destKey,
String uploadId,
int partNumber,
long size,
InputStream uploadStream,
File sourceFile,
long offset) throws PathIOException {
long size) throws PathIOException {
checkNotNull(uploadId);
// exactly one source must be set; xor verifies this
checkArgument((uploadStream != null) ^ (sourceFile != null),
"Data source");
checkArgument(size >= 0, "Invalid partition size %s", size);
checkArgument(partNumber > 0,
"partNumber must be between 1 and %s inclusive, but is %s",
DEFAULT_UPLOAD_PART_COUNT_LIMIT, partNumber);
multipartPartCountLimit, partNumber);
LOG.debug("Creating part upload request for {} #{} size {}",
uploadId, partNumber, size);
@ -539,88 +529,76 @@ public class RequestFactoryImpl implements RequestFactory {
throw new PathIOException(destKey,
String.format(pathErrorMsg, partNumber, multipartPartCountLimit));
}
UploadPartRequest request = new UploadPartRequest()
.withBucketName(getBucket())
.withKey(destKey)
.withUploadId(uploadId)
.withPartNumber(partNumber)
.withPartSize(size);
if (uploadStream != null) {
// there's an upload stream. Bind to it.
request.setInputStream(uploadStream);
} else {
checkArgument(sourceFile.exists(),
"Source file does not exist: %s", sourceFile);
checkArgument(sourceFile.isFile(),
"Source is not a file: %s", sourceFile);
checkArgument(offset >= 0, "Invalid offset %s", offset);
long length = sourceFile.length();
checkArgument(offset == 0 || offset < length,
"Offset %s beyond length of file %s", offset, length);
request.setFile(sourceFile);
request.setFileOffset(offset);
}
setOptionalUploadPartRequestParameters(request);
return prepareRequest(request);
UploadPartRequest.Builder builder = UploadPartRequest.builder()
.bucket(getBucket())
.key(destKey)
.uploadId(uploadId)
.partNumber(partNumber)
.contentLength(size);
uploadPartEncryptionParameters(builder);
return prepareRequest(builder);
}
@Override
public SelectObjectContentRequest newSelectRequest(String key) {
SelectObjectContentRequest request = new SelectObjectContentRequest();
request.setBucketName(bucket);
request.setKey(key);
generateSSECustomerKey().ifPresent(request::setSSECustomerKey);
return prepareRequest(request);
public SelectObjectContentRequest.Builder newSelectRequestBuilder(String key) {
SelectObjectContentRequest.Builder requestBuilder =
SelectObjectContentRequest.builder().bucket(bucket).key(key);
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> {
requestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey)
.sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
return prepareRequest(requestBuilder);
}
@Override
public ListObjectsRequest newListObjectsV1Request(
public ListObjectsRequest.Builder newListObjectsV1RequestBuilder(
final String key,
final String delimiter,
final int maxKeys) {
ListObjectsRequest request = new ListObjectsRequest()
.withBucketName(bucket)
.withMaxKeys(maxKeys)
.withPrefix(key);
ListObjectsRequest.Builder requestBuilder =
ListObjectsRequest.builder().bucket(bucket).maxKeys(maxKeys).prefix(key);
if (delimiter != null) {
request.setDelimiter(delimiter);
requestBuilder.delimiter(delimiter);
}
return prepareRequest(request);
return prepareRequest(requestBuilder);
}
@Override
public ListNextBatchOfObjectsRequest newListNextBatchOfObjectsRequest(
ObjectListing prev) {
return prepareRequest(new ListNextBatchOfObjectsRequest(prev));
}
@Override
public ListObjectsV2Request newListObjectsV2Request(
public ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(
final String key,
final String delimiter,
final int maxKeys) {
final ListObjectsV2Request request = new ListObjectsV2Request()
.withBucketName(bucket)
.withMaxKeys(maxKeys)
.withPrefix(key);
final ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder()
.bucket(bucket)
.maxKeys(maxKeys)
.prefix(key);
if (delimiter != null) {
request.setDelimiter(delimiter);
requestBuilder.delimiter(delimiter);
}
return prepareRequest(request);
return prepareRequest(requestBuilder);
}
@Override
public DeleteObjectRequest newDeleteObjectRequest(String key) {
return prepareRequest(new DeleteObjectRequest(bucket, key));
public DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key) {
return prepareRequest(DeleteObjectRequest.builder().bucket(bucket).key(key));
}
@Override
public DeleteObjectsRequest newBulkDeleteRequest(
List<DeleteObjectsRequest.KeyVersion> keysToDelete) {
return prepareRequest(
new DeleteObjectsRequest(bucket)
.withKeys(keysToDelete)
.withQuiet(true));
public DeleteObjectsRequest.Builder newBulkDeleteRequestBuilder(
List<ObjectIdentifier> keysToDelete) {
return prepareRequest(DeleteObjectsRequest
.builder()
.bucket(bucket)
.delete(d -> d.objects(keysToDelete).quiet(true)));
}
@Override
@ -628,23 +606,6 @@ public class RequestFactoryImpl implements RequestFactory {
encryptionSecrets = secrets;
}
/**
* Set the metadata from the options if the options are not
* null and the metadata contains headers.
* @param options options for the request
* @param objectMetadata metadata to patch
*/
private void maybeSetMetadata(
@Nullable PutObjectOptions options,
final ObjectMetadata objectMetadata) {
if (options != null) {
Map<String, String> headers = options.getHeaders();
if (headers != null) {
objectMetadata.setUserMetadata(headers);
}
}
}
/**
* Create a builder.
* @return new builder.
@ -671,7 +632,7 @@ public class RequestFactoryImpl implements RequestFactory {
/**
* ACL For new objects.
*/
private CannedAccessControlList cannedACL = null;
private String cannedACL = null;
/** Content Encoding. */
private String contentEncoding;
@ -754,7 +715,7 @@ public class RequestFactoryImpl implements RequestFactory {
* @return the builder
*/
public RequestFactoryBuilder withCannedACL(
final CannedAccessControlList value) {
final String value) {
cannedACL = value;
return this;
}
@ -806,11 +767,9 @@ public class RequestFactoryImpl implements RequestFactory {
/**
* Post-creation preparation of AWS request.
* @param t request
* @param <T> request type.
* @return prepared entry.
* @param t request builder
*/
@Retries.OnceRaw
<T extends AmazonWebServiceRequest> T prepareRequest(T t);
void prepareRequest(SdkRequest.Builder t);
}
}

View File

@ -34,10 +34,12 @@ import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import com.amazonaws.services.s3.model.CompleteMultipartUploadResult;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse;
import software.amazon.awssdk.services.s3.model.CompletedPart;
import software.amazon.awssdk.services.s3.model.UploadPartRequest;
import software.amazon.awssdk.services.s3.model.UploadPartResponse;
import org.apache.hadoop.thirdparty.com.google.common.base.Charsets;
import org.apache.commons.lang3.StringUtils;
@ -152,18 +154,18 @@ class S3AMultipartUploader extends AbstractMultipartUploader {
Charsets.UTF_8);
return context.submit(new CompletableFuture<>(),
() -> {
UploadPartRequest request = writeOperations.newUploadPartRequest(key,
uploadIdString, partNumber, (int) lengthInBytes, inputStream,
null, 0L);
UploadPartResult result = writeOperations.uploadPart(request, statistics);
UploadPartRequest request = writeOperations.newUploadPartRequestBuilder(key,
uploadIdString, partNumber, lengthInBytes).build();
RequestBody body = RequestBody.fromInputStream(inputStream, lengthInBytes);
UploadPartResponse response = writeOperations.uploadPart(request, body, statistics);
statistics.partPut(lengthInBytes);
String eTag = result.getETag();
String eTag = response.eTag();
return BBPartHandle.from(
ByteBuffer.wrap(
buildPartHandlePayload(
filePath.toUri().toString(),
uploadIdString,
result.getPartNumber(),
partNumber,
eTag,
lengthInBytes)));
});
@ -188,7 +190,7 @@ class S3AMultipartUploader extends AbstractMultipartUploader {
String uploadIdStr = new String(uploadIdBytes, 0, uploadIdBytes.length,
Charsets.UTF_8);
ArrayList<PartETag> eTags = new ArrayList<>();
ArrayList<CompletedPart> eTags = new ArrayList<>();
eTags.ensureCapacity(handles.size());
long totalLength = 0;
// built up to identify duplicates -if the size of this set is
@ -201,7 +203,8 @@ class S3AMultipartUploader extends AbstractMultipartUploader {
payload.validate(uploadIdStr, filePath);
ids.add(payload.getPartNumber());
totalLength += payload.getLen();
eTags.add(new PartETag(handle.getKey(), payload.getEtag()));
eTags.add(
CompletedPart.builder().partNumber(handle.getKey()).eTag(payload.getEtag()).build());
}
Preconditions.checkArgument(ids.size() == count,
"Duplicate PartHandles");
@ -210,7 +213,7 @@ class S3AMultipartUploader extends AbstractMultipartUploader {
long finalLen = totalLength;
return context.submit(new CompletableFuture<>(),
trackDurationOfCallable(statistics, MULTIPART_UPLOAD_COMPLETED.getSymbol(), () -> {
CompleteMultipartUploadResult result =
CompleteMultipartUploadResponse result =
writeOperations.commitUpload(
key,
uploadIdStr,
@ -218,7 +221,7 @@ class S3AMultipartUploader extends AbstractMultipartUploader {
finalLen
);
byte[] eTag = result.getETag().getBytes(Charsets.UTF_8);
byte[] eTag = result.eTag().getBytes(Charsets.UTF_8);
statistics.uploadCompleted();
return (PathHandle) () -> ByteBuffer.wrap(eTag);
}));

View File

@ -18,12 +18,10 @@
package org.apache.hadoop.fs.s3a.impl;
import java.io.Closeable;
import java.io.InputStream;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nullable;
import com.amazonaws.internal.SdkFilterInputStream;
import software.amazon.awssdk.http.Abortable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -31,23 +29,18 @@ import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics;
import org.apache.hadoop.util.functional.CallableRaisingIOE;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DRAIN_BUFFER_SIZE;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
/**
* Drains/aborts s3 or other AWS SDK streams.
* It is callable so can be passed directly to a submitter
* for async invocation.
* A request object may be passed in; it will be implicitly
* cached until this object is GCd.
* This is because in some versions of the AWS SDK, the S3Object
* has a finalize() method which releases the http connection,
* even when the stream is still open.
* See HADOOP-17338 for details.
*/
public class SDKStreamDrainer implements CallableRaisingIOE<Boolean> {
public class SDKStreamDrainer<TStream extends InputStream & Abortable>
implements CallableRaisingIOE<Boolean> {
private static final Logger LOG = LoggerFactory.getLogger(
SDKStreamDrainer.class);
@ -58,17 +51,9 @@ public class SDKStreamDrainer implements CallableRaisingIOE<Boolean> {
private final String uri;
/**
* Request object; usually S3Object
* Never used, but needed to keep the http connection
* open long enough for draining to take place.
* Stream from the getObject response for draining and closing.
*/
@Nullable
private final Closeable requestObject;
/**
* Stream from the {@link #requestObject} for draining and closing.
*/
private final SdkFilterInputStream sdkStream;
private final TStream sdkStream;
/**
* Should the request be aborted?
@ -118,7 +103,6 @@ public class SDKStreamDrainer implements CallableRaisingIOE<Boolean> {
/**
* Prepare to drain the stream.
* @param uri URI for messages
* @param requestObject http request object; needed to avoid GC issues.
* @param sdkStream stream to close.
* @param shouldAbort force an abort; used if explicitly requested.
* @param streamStatistics stats to update
@ -126,14 +110,12 @@ public class SDKStreamDrainer implements CallableRaisingIOE<Boolean> {
* @param remaining remaining bytes
*/
public SDKStreamDrainer(final String uri,
@Nullable final Closeable requestObject,
final SdkFilterInputStream sdkStream,
final TStream sdkStream,
final boolean shouldAbort,
final int remaining,
final S3AInputStreamStatistics streamStatistics,
final String reason) {
this.uri = uri;
this.requestObject = requestObject;
this.sdkStream = requireNonNull(sdkStream);
this.shouldAbort = shouldAbort;
this.remaining = remaining;
@ -233,7 +215,6 @@ public class SDKStreamDrainer implements CallableRaisingIOE<Boolean> {
LOG.debug("Closing stream");
sdkStream.close();
cleanupWithLogger(LOG, requestObject);
// this MUST come after the close, so that if the IO operations fail
// and an abort is triggered, the initial attempt's statistics
// aren't collected.
@ -255,8 +236,6 @@ public class SDKStreamDrainer implements CallableRaisingIOE<Boolean> {
LOG.warn("When aborting {} stream after failing to close it for {}",
uri, reason, e);
thrown = e;
} finally {
cleanupWithLogger(LOG, requestObject);
}
streamStatistics.streamClose(true, remaining);
@ -269,11 +248,7 @@ public class SDKStreamDrainer implements CallableRaisingIOE<Boolean> {
return uri;
}
public Object getRequestObject() {
return requestObject;
}
public SdkFilterInputStream getSdkStream() {
public TStream getSdkStream() {
return sdkStream;
}

View File

@ -47,6 +47,9 @@ public final class V2Migration {
private static final LogExactlyOnce WARN_OF_CUSTOM_SIGNER =
new LogExactlyOnce(SDK_V2_UPGRADE_LOG);
private static final LogExactlyOnce WARN_OF_REQUEST_HANDLERS =
new LogExactlyOnce(SDK_V2_UPGRADE_LOG);
private static final LogExactlyOnce WARN_ON_GET_OBJECT_METADATA =
new LogExactlyOnce(SDK_V2_UPGRADE_LOG);
@ -87,6 +90,15 @@ public final class V2Migration {
+ "once S3A is upgraded to SDK V2");
}
/**
* Warns on use of request handlers.
*/
public static void v1RequestHandlersUsed() {
WARN_OF_REQUEST_HANDLERS.warn(
"The request handler interface has changed in AWS SDK V2, use exception interceptors "
+ "once S3A is upgraded to SDK V2");
}
/**
* Warns on use of getObjectMetadata.
*/

View File

@ -19,15 +19,11 @@
package org.apache.hadoop.fs.s3a.prefetch;
import java.io.IOException;
import java.io.InputStream;
import java.util.IdentityHashMap;
import java.util.Map;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -35,12 +31,14 @@ import org.apache.hadoop.fs.impl.prefetch.Validate;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.S3AInputStream;
import org.apache.hadoop.fs.s3a.S3AReadOpContext;
import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.S3ObjectAttributes;
import org.apache.hadoop.fs.s3a.impl.ChangeTracker;
import org.apache.hadoop.fs.s3a.impl.SDKStreamDrainer;
import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics;
import org.apache.hadoop.fs.statistics.DurationTracker;
/**
* Encapsulates low level interactions with S3 object on AWS.
*/
@ -74,12 +72,6 @@ public class S3ARemoteObject {
*/
private final ChangeTracker changeTracker;
/**
* Maps a stream returned by openForRead() to the associated S3 object.
* That allows us to close the object when closing the stream.
*/
private final Map<InputStream, S3Object> s3Objects;
/**
* uri of the object being read.
*/
@ -123,7 +115,6 @@ public class S3ARemoteObject {
this.client = client;
this.streamStatistics = streamStatistics;
this.changeTracker = changeTracker;
this.s3Objects = new IdentityHashMap<>();
this.uri = this.getPath();
}
@ -187,21 +178,23 @@ public class S3ARemoteObject {
* @throws IllegalArgumentException if offset is greater than or equal to file size.
* @throws IllegalArgumentException if size is greater than the remaining bytes.
*/
public InputStream openForRead(long offset, int size) throws IOException {
public ResponseInputStream<GetObjectResponse> openForRead(long offset, int size)
throws IOException {
Validate.checkNotNegative(offset, "offset");
Validate.checkLessOrEqual(offset, "offset", size(), "size()");
Validate.checkLessOrEqual(size, "size", size() - offset, "size() - offset");
streamStatistics.streamOpened();
final GetObjectRequest request =
client.newGetRequest(s3Attributes.getKey())
.withRange(offset, offset + size - 1);
changeTracker.maybeApplyConstraint(request);
final GetObjectRequest request = client
.newGetRequestBuilder(s3Attributes.getKey())
.range(S3AUtils.formatRange(offset, offset + size - 1))
.applyMutation(changeTracker::maybeApplyConstraint)
.build();
String operation = String.format(
"%s %s at %d", S3AInputStream.OPERATION_OPEN, uri, offset);
DurationTracker tracker = streamStatistics.initiateGetRequest();
S3Object object = null;
ResponseInputStream<GetObjectResponse> object = null;
try {
object = Invoker.once(operation, uri, () -> client.getObject(request));
@ -212,27 +205,14 @@ public class S3ARemoteObject {
tracker.close();
}
changeTracker.processResponse(object, operation, offset);
InputStream stream = object.getObjectContent();
synchronized (s3Objects) {
s3Objects.put(stream, object);
}
return stream;
changeTracker.processResponse(object.response(), operation, offset);
return object;
}
void close(InputStream inputStream, int numRemainingBytes) {
S3Object obj;
synchronized (s3Objects) {
obj = s3Objects.remove(inputStream);
if (obj == null) {
throw new IllegalArgumentException("inputStream not found");
}
}
void close(ResponseInputStream<GetObjectResponse> inputStream, int numRemainingBytes) {
SDKStreamDrainer drainer = new SDKStreamDrainer(
uri,
obj,
(S3ObjectInputStream)inputStream,
inputStream,
false,
numRemainingBytes,
streamStatistics,

View File

@ -22,7 +22,6 @@ package org.apache.hadoop.fs.s3a.prefetch;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.net.SocketTimeoutException;
import java.nio.ByteBuffer;
@ -33,6 +32,9 @@ import org.apache.hadoop.fs.impl.prefetch.Validate;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_REMOTE_BLOCK_READ;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation;
@ -144,7 +146,8 @@ public class S3ARemoteObjectReader implements Closeable {
return;
}
InputStream inputStream = remoteObject.openForRead(offset, readSize);
ResponseInputStream<GetObjectResponse> inputStream =
remoteObject.openForRead(offset, readSize);
int numRemainingBytes = readSize;
byte[] bytes = new byte[READ_BUFFER_SIZE];

View File

@ -33,7 +33,7 @@ import java.util.Scanner;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import com.amazonaws.services.s3.model.MultipartUpload;
import software.amazon.awssdk.services.s3.model.MultipartUpload;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -694,11 +694,11 @@ public abstract class S3GuardTool extends Configured implements Tool,
count++;
if (mode == Mode.ABORT || mode == Mode.LIST || verbose) {
println(out, "%s%s %s", mode == Mode.ABORT ? "Deleting: " : "",
upload.getKey(), upload.getUploadId());
upload.key(), upload.uploadId());
}
if (mode == Mode.ABORT) {
writeOperationHelper
.abortMultipartUpload(upload.getKey(), upload.getUploadId(),
.abortMultipartUpload(upload.key(), upload.uploadId(),
true, LOG_EVENT);
}
}
@ -726,7 +726,7 @@ public abstract class S3GuardTool extends Configured implements Tool,
return true;
}
Date ageDate = new Date(System.currentTimeMillis() - msec);
return ageDate.compareTo(u.getInitiated()) >= 0;
return ageDate.compareTo(Date.from(u.initiated())) >= 0;
}
private void processArgs(List<String> args, PrintStream out)

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.select;
import java.util.Enumeration;
import java.util.NoSuchElementException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingQueue;
import org.reactivestreams.Subscriber;
import org.reactivestreams.Subscription;
import software.amazon.awssdk.core.async.SdkPublisher;
import software.amazon.awssdk.core.exception.SdkException;
/**
* Implements the {@link Enumeration} interface by subscribing to a
* {@link SdkPublisher} instance. The enumeration will buffer a fixed
* number of elements and only request new ones from the publisher
* when they are consumed. Calls to {@link #hasMoreElements()} and
* {@link #nextElement()} may block while waiting for new elements.
* @param <T> the type of element.
*/
public final class BlockingEnumeration<T> implements Enumeration<T> {
private static final class Signal<T> {
private final T element;
private final Throwable error;
Signal(T element) {
this.element = element;
this.error = null;
}
Signal(Throwable error) {
this.element = null;
this.error = error;
}
}
private final Signal<T> END_SIGNAL = new Signal<>((Throwable)null);
private final CompletableFuture<Subscription> subscription = new CompletableFuture<>();
private final BlockingQueue<Signal<T>> signalQueue;
private final int bufferSize;
private Signal<T> current = null;
/**
* Create an enumeration with a fixed buffer size and an
* optional injected first element.
* @param publisher the publisher feeding the enumeration.
* @param bufferSize the buffer size.
* @param firstElement (optional) first element the enumeration will return.
*/
public BlockingEnumeration(SdkPublisher<T> publisher,
final int bufferSize,
final T firstElement) {
this.signalQueue = new LinkedBlockingQueue<>();
this.bufferSize = bufferSize;
if (firstElement != null) {
this.current = new Signal<>(firstElement);
}
publisher.subscribe(new EnumerationSubscriber());
}
/**
* Create an enumeration with a fixed buffer size.
* @param publisher the publisher feeding the enumeration.
* @param bufferSize the buffer size.
*/
public BlockingEnumeration(SdkPublisher<T> publisher,
final int bufferSize) {
this(publisher, bufferSize, null);
}
@Override
public boolean hasMoreElements() {
if (current == null) {
try {
current = signalQueue.take();
} catch (InterruptedException e) {
current = new Signal<>(e);
subscription.thenAccept(Subscription::cancel);
Thread.currentThread().interrupt();
}
}
if (current.error != null) {
Throwable error = current.error;
current = END_SIGNAL;
if (error instanceof Error) {
throw (Error)error;
} else if (error instanceof SdkException) {
throw (SdkException)error;
} else {
throw SdkException.create("Unexpected error", error);
}
}
return current != END_SIGNAL;
}
@Override
public T nextElement() {
if (!hasMoreElements()) {
throw new NoSuchElementException();
}
T element = current.element;
current = null;
subscription.thenAccept(s -> s.request(1));
return element;
}
private final class EnumerationSubscriber implements Subscriber<T> {
@Override
public void onSubscribe(Subscription s) {
long request = bufferSize;
if (current != null) {
request--;
}
if (request > 0) {
s.request(request);
}
subscription.complete(s);
}
@Override
public void onNext(T t) {
signalQueue.add(new Signal<>(t));
}
@Override
public void onError(Throwable t) {
signalQueue.add(new Signal<>(t));
}
@Override
public void onComplete() {
signalQueue.add(END_SIGNAL);
}
}
}

View File

@ -21,13 +21,13 @@ package org.apache.hadoop.fs.s3a.select;
import java.io.IOException;
import java.util.Locale;
import com.amazonaws.services.s3.model.CSVInput;
import com.amazonaws.services.s3.model.CSVOutput;
import com.amazonaws.services.s3.model.ExpressionType;
import com.amazonaws.services.s3.model.InputSerialization;
import com.amazonaws.services.s3.model.OutputSerialization;
import com.amazonaws.services.s3.model.QuoteFields;
import com.amazonaws.services.s3.model.SelectObjectContentRequest;
import software.amazon.awssdk.services.s3.model.CSVInput;
import software.amazon.awssdk.services.s3.model.CSVOutput;
import software.amazon.awssdk.services.s3.model.ExpressionType;
import software.amazon.awssdk.services.s3.model.InputSerialization;
import software.amazon.awssdk.services.s3.model.OutputSerialization;
import software.amazon.awssdk.services.s3.model.QuoteFields;
import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -145,9 +145,9 @@ public class SelectBinding {
Preconditions.checkState(isEnabled(),
"S3 Select is not enabled for %s", path);
SelectObjectContentRequest request = operations.newSelectRequest(path);
SelectObjectContentRequest.Builder request = operations.newSelectRequestBuilder(path);
buildRequest(request, expression, builderOptions);
return request;
return request.build();
}
/**
@ -175,14 +175,14 @@ public class SelectBinding {
}
boolean sqlInErrors = builderOptions.getBoolean(SELECT_ERRORS_INCLUDE_SQL,
errorsIncludeSql);
String expression = request.getExpression();
String expression = request.expression();
final String errorText = sqlInErrors ? expression : "Select";
if (sqlInErrors) {
LOG.info("Issuing SQL request {}", expression);
}
SelectEventStreamPublisher selectPublisher = operations.select(path, request, errorText);
return new SelectInputStream(readContext,
objectAttributes,
operations.select(path, request, errorText));
objectAttributes, selectPublisher);
}
/**
@ -197,14 +197,14 @@ public class SelectBinding {
* <li>The default values in {@link SelectConstants}</li>
* </ol>
*
* @param request request to build up
* @param requestBuilder request to build up
* @param expression SQL expression
* @param builderOptions the options which came in from the openFile builder.
* @throws IllegalArgumentException if an option is somehow invalid.
* @throws IOException if an option is somehow invalid.
*/
void buildRequest(
final SelectObjectContentRequest request,
final SelectObjectContentRequest.Builder requestBuilder,
final String expression,
final Configuration builderOptions)
throws IllegalArgumentException, IOException {
@ -213,7 +213,6 @@ public class SelectBinding {
final Configuration ownerConf = operations.getConf();
String inputFormat = builderOptions.get(SELECT_INPUT_FORMAT,
SELECT_FORMAT_CSV).toLowerCase(Locale.ENGLISH);
Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(inputFormat),
@ -224,34 +223,24 @@ public class SelectBinding {
Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(outputFormat),
"Unsupported output format %s", outputFormat);
request.setExpressionType(ExpressionType.SQL);
request.setExpression(expandBackslashChars(expression));
InputSerialization inputSerialization = buildCsvInputRequest(ownerConf,
builderOptions);
String compression = opt(builderOptions,
ownerConf,
SELECT_INPUT_COMPRESSION,
COMPRESSION_OPT_NONE,
true).toUpperCase(Locale.ENGLISH);
if (isNotEmpty(compression)) {
inputSerialization.setCompressionType(compression);
}
request.setInputSerialization(inputSerialization);
request.setOutputSerialization(buildCSVOutput(ownerConf, builderOptions));
requestBuilder.expressionType(ExpressionType.SQL);
requestBuilder.expression(expandBackslashChars(expression));
requestBuilder.inputSerialization(
buildCsvInput(ownerConf, builderOptions));
requestBuilder.outputSerialization(
buildCSVOutput(ownerConf, builderOptions));
}
/**
* Build the CSV input request.
* Build the CSV input format for a request.
* @param ownerConf FS owner configuration
* @param builderOptions options on the specific request
* @return the constructed request
* @return the input format
* @throws IllegalArgumentException argument failure
* @throws IOException validation failure
*/
public InputSerialization buildCsvInputRequest(
public InputSerialization buildCsvInput(
final Configuration ownerConf,
final Configuration builderOptions)
throws IllegalArgumentException, IOException {
@ -283,28 +272,35 @@ public class SelectBinding {
CSV_INPUT_QUOTE_ESCAPE_CHARACTER_DEFAULT);
// CSV input
CSVInput csv = new CSVInput();
csv.setFieldDelimiter(fieldDelimiter);
csv.setRecordDelimiter(recordDelimiter);
csv.setComments(commentMarker);
csv.setQuoteCharacter(quoteCharacter);
CSVInput.Builder csvBuilder = CSVInput.builder()
.fieldDelimiter(fieldDelimiter)
.recordDelimiter(recordDelimiter)
.comments(commentMarker)
.quoteCharacter(quoteCharacter);
if (StringUtils.isNotEmpty(quoteEscapeCharacter)) {
csv.setQuoteEscapeCharacter(quoteEscapeCharacter);
csvBuilder.quoteEscapeCharacter(quoteEscapeCharacter);
}
csv.setFileHeaderInfo(headerInfo);
InputSerialization inputSerialization = new InputSerialization();
inputSerialization.setCsv(csv);
return inputSerialization;
csvBuilder.fileHeaderInfo(headerInfo);
InputSerialization.Builder inputSerialization =
InputSerialization.builder()
.csv(csvBuilder.build());
String compression = opt(builderOptions,
ownerConf,
SELECT_INPUT_COMPRESSION,
COMPRESSION_OPT_NONE,
true).toUpperCase(Locale.ENGLISH);
if (isNotEmpty(compression)) {
inputSerialization.compressionType(compression);
}
return inputSerialization.build();
}
/**
* Build CSV output for a request.
* Build CSV output format for a request.
* @param ownerConf FS owner configuration
* @param builderOptions options on the specific request
* @return the constructed request
* @return the output format
* @throws IllegalArgumentException argument failure
* @throws IOException validation failure
*/
@ -333,21 +329,19 @@ public class SelectBinding {
CSV_OUTPUT_QUOTE_FIELDS,
CSV_OUTPUT_QUOTE_FIELDS_ALWAYS).toUpperCase(Locale.ENGLISH);
// output is CSV, always
OutputSerialization outputSerialization
= new OutputSerialization();
CSVOutput csvOut = new CSVOutput();
csvOut.setQuoteCharacter(quoteCharacter);
csvOut.setQuoteFields(
QuoteFields.fromValue(quoteFields));
csvOut.setFieldDelimiter(fieldDelimiter);
csvOut.setRecordDelimiter(recordDelimiter);
CSVOutput.Builder csvOutputBuilder = CSVOutput.builder()
.quoteCharacter(quoteCharacter)
.quoteFields(QuoteFields.fromValue(quoteFields))
.fieldDelimiter(fieldDelimiter)
.recordDelimiter(recordDelimiter);
if (!quoteEscapeCharacter.isEmpty()) {
csvOut.setQuoteEscapeCharacter(quoteEscapeCharacter);
csvOutputBuilder.quoteEscapeCharacter(quoteEscapeCharacter);
}
outputSerialization.setCsv(csvOut);
return outputSerialization;
// output is CSV, always
return OutputSerialization.builder()
.csv(csvOutputBuilder.build())
.build();
}
/**
@ -359,18 +353,18 @@ public class SelectBinding {
public static String toString(final SelectObjectContentRequest request) {
StringBuilder sb = new StringBuilder();
sb.append("SelectObjectContentRequest{")
.append("bucket name=").append(request.getBucketName())
.append("; key=").append(request.getKey())
.append("; expressionType=").append(request.getExpressionType())
.append("; expression=").append(request.getExpression());
InputSerialization input = request.getInputSerialization();
.append("bucket name=").append(request.bucket())
.append("; key=").append(request.key())
.append("; expressionType=").append(request.expressionType())
.append("; expression=").append(request.expression());
InputSerialization input = request.inputSerialization();
if (input != null) {
sb.append("; Input")
.append(input.toString());
} else {
sb.append("; Input Serialization: none");
}
OutputSerialization out = request.getOutputSerialization();
OutputSerialization out = request.outputSerialization();
if (out != null) {
sb.append("; Output")
.append(out.toString());

Some files were not shown because too many files have changed in this diff Show More