HADOOP-13727. S3A: Reduce high number of connections to EC2 Instance Metadata Service caused by InstanceProfileCredentialsProvider. Contributed by Chris Nauroth.

This commit is contained in:
Chris Nauroth 2016-10-24 21:22:34 -07:00
parent 0a166b1347
commit d8fa1cfa67
8 changed files with 621 additions and 193 deletions

View File

@ -41,10 +41,10 @@
<property>
<name>hadoop.http.filter.initializers</name>
<value>org.apache.hadoop.http.lib.StaticUserWebFilter</value>
<description>A comma separated list of class names. Each class in the list
must extend org.apache.hadoop.http.FilterInitializer. The corresponding
Filter will be initialized. Then, the Filter will be applied to all user
facing jsp and servlet web pages. The ordering of the list defines the
<description>A comma separated list of class names. Each class in the list
must extend org.apache.hadoop.http.FilterInitializer. The corresponding
Filter will be initialized. Then, the Filter will be applied to all user
facing jsp and servlet web pages. The ordering of the list defines the
ordering of the filters.</description>
</property>
@ -76,14 +76,14 @@
<name>hadoop.security.group.mapping</name>
<value>org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback</value>
<description>
Class for user to group mapping (get groups for a given user) for ACL.
Class for user to group mapping (get groups for a given user) for ACL.
The default implementation,
org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback,
will determine if the Java Native Interface (JNI) is available. If JNI is
available the implementation will use the API within hadoop to resolve a
list of groups for a user. If JNI is not available then the shell
implementation, ShellBasedUnixGroupsMapping, is used. This implementation
shells out to the Linux/Unix environment with the
org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback,
will determine if the Java Native Interface (JNI) is available. If JNI is
available the implementation will use the API within hadoop to resolve a
list of groups for a user. If JNI is not available then the shell
implementation, ShellBasedUnixGroupsMapping, is used. This implementation
shells out to the Linux/Unix environment with the
<code>bash -c groups</code> command to resolve a list of groups for a user.
</description>
</property>
@ -481,10 +481,10 @@
<property>
<name>hadoop.rpc.protection</name>
<value>authentication</value>
<description>A comma-separated list of protection values for secured sasl
<description>A comma-separated list of protection values for secured sasl
connections. Possible values are authentication, integrity and privacy.
authentication means authentication only and no integrity or privacy;
integrity implies authentication and integrity are enabled; and privacy
authentication means authentication only and no integrity or privacy;
integrity implies authentication and integrity are enabled; and privacy
implies all of authentication, integrity and privacy are enabled.
hadoop.security.saslproperties.resolver.class can be used to override
the hadoop.rpc.protection for a connection at the server side.
@ -494,10 +494,10 @@
<property>
<name>hadoop.security.saslproperties.resolver.class</name>
<value></value>
<description>SaslPropertiesResolver used to resolve the QOP used for a
connection. If not specified, the full set of values specified in
hadoop.rpc.protection is used while determining the QOP used for the
connection. If a class is specified, then the QOP values returned by
<description>SaslPropertiesResolver used to resolve the QOP used for a
connection. If not specified, the full set of values specified in
hadoop.rpc.protection is used while determining the QOP used for the
connection. If a class is specified, then the QOP values returned by
the class will be used while determining the QOP used for the connection.
</description>
</property>
@ -566,7 +566,7 @@
page size (4096 on Intel x86), and it determines how much data is
buffered during read and write operations.</description>
</property>
<property>
<name>io.bytes.per.checksum</name>
<value>512</value>
@ -599,7 +599,7 @@
either by by name or the full pathname. In the former case, the
library is located by the dynamic linker, usually searching the
directories specified in the environment variable LD_LIBRARY_PATH.
The value of "system-native" indicates that the default system
library should be used. To indicate that the algorithm should
operate entirely in Java, specify "java-builtin".</description>
@ -709,8 +709,8 @@
<description>Number of minutes between trash checkpoints.
Should be smaller or equal to fs.trash.interval. If zero,
the value is set to the value of fs.trash.interval.
Every time the checkpointer runs it creates a new checkpoint
out of current and removes checkpoints created more than
Every time the checkpointer runs it creates a new checkpoint
out of current and removes checkpoints created more than
fs.trash.interval minutes ago.
</description>
</property>
@ -735,7 +735,7 @@
<name>fs.AbstractFileSystem.har.impl</name>
<value>org.apache.hadoop.fs.HarFs</value>
<description>The AbstractFileSystem for har: uris.</description>
</property>
</property>
<property>
<name>fs.AbstractFileSystem.hdfs.impl</name>
@ -806,7 +806,7 @@
<property>
<name>fs.s3n.maxRetries</name>
<value>4</value>
<description>The maximum number of retries for reading or writing files to S3,
<description>The maximum number of retries for reading or writing files to S3,
before we signal failure to the application.
</description>
</property>
@ -895,15 +895,37 @@
com.amazonaws.auth.AWSCredentialsProvider.
These are loaded and queried in sequence for a valid set of credentials.
Each listed class must provide either an accessible constructor accepting
java.net.URI and org.apache.hadoop.conf.Configuration, or an accessible
default constructor.
Each listed class must implement one of the following means of
construction, which are attempted in order:
1. a public constructor accepting java.net.URI and
org.apache.hadoop.conf.Configuration,
2. a public static method named getInstance that accepts no
arguments and returns an instance of
com.amazonaws.auth.AWSCredentialsProvider, or
3. a public default constructor.
Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows
anonymous access to a publicly accessible S3 bucket without any credentials.
Please note that allowing anonymous access to an S3 bucket compromises
security and therefore is unsuitable for most use cases. It can be useful
for accessing public data sets without requiring AWS credentials.
If unspecified, then the default list of credential provider classes,
queried in sequence, is:
1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports static
configuration of AWS access key ID and secret access key. See also
fs.s3a.access.key and fs.s3a.secret.key.
2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports
configuration of AWS access key ID and secret access key in
environment variables named AWS_ACCESS_KEY_ID and
AWS_SECRET_ACCESS_KEY, as documented in the AWS SDK.
3. org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider:
a shared instance of
com.amazonaws.auth.InstanceProfileCredentialsProvider from the AWS
SDK, which supports use of instance profile credentials if running
in an EC2 VM. Using this shared instance potentially reduces load
on the EC2 instance metadata service for multi-threaded
applications.
</description>
</property>
@ -1007,7 +1029,7 @@
<property>
<name>fs.s3a.paging.maximum</name>
<value>5000</value>
<description>How many keys to request from S3 when doing
<description>How many keys to request from S3 when doing
directory listings at a time.</description>
</property>
@ -1106,7 +1128,7 @@
<property>
<name>fs.s3a.buffer.dir</name>
<value>${hadoop.tmp.dir}/s3a</value>
<description>Comma separated list of directories that will be used to buffer file
<description>Comma separated list of directories that will be used to buffer file
uploads to.</description>
</property>
@ -1197,7 +1219,7 @@
<property>
<name>io.seqfile.compress.blocksize</name>
<value>1000000</value>
<description>The minimum block size for compression in block compressed
<description>The minimum block size for compression in block compressed
SequenceFiles.
</description>
</property>
@ -1213,7 +1235,7 @@
<property>
<name>io.seqfile.sorter.recordlimit</name>
<value>1000000</value>
<description>The limit on number of records to be kept in memory in a spill
<description>The limit on number of records to be kept in memory in a spill
in SequenceFiles.Sorter
</description>
</property>
@ -1291,7 +1313,7 @@
<property>
<name>ipc.client.connect.timeout</name>
<value>20000</value>
<description>Indicates the number of milliseconds a client will wait for the
<description>Indicates the number of milliseconds a client will wait for the
socket to establish a server connection.
</description>
</property>
@ -1388,10 +1410,10 @@
<property>
<name>hadoop.security.impersonation.provider.class</name>
<value></value>
<description>A class which implements ImpersonationProvider interface, used to
authorize whether one user can impersonate a specific user.
If not specified, the DefaultImpersonationProvider will be used.
If a class is specified, then that class will be used to determine
<description>A class which implements ImpersonationProvider interface, used to
authorize whether one user can impersonate a specific user.
If not specified, the DefaultImpersonationProvider will be used.
If a class is specified, then that class will be used to determine
the impersonation capability.
</description>
</property>
@ -1453,7 +1475,7 @@
<property>
<name>net.topology.script.number.args</name>
<value>100</value>
<description> The max number of args that the script configured with
<description> The max number of args that the script configured with
net.topology.script.file.name should be run with. Each arg is an
IP address.
</description>
@ -1467,7 +1489,7 @@
org.apache.hadoop.net.TableMapping. The file format is a two column text
file, with columns separated by whitespace. The first column is a DNS or
IP address and the second column specifies the rack where the address maps.
If no entry corresponding to a host in the cluster is found, then
If no entry corresponding to a host in the cluster is found, then
/default-rack is assumed.
</description>
</property>
@ -1983,14 +2005,14 @@
<name>nfs.exports.allowed.hosts</name>
<value>* rw</value>
<description>
By default, the export can be mounted by any client. The value string
contains machine name and access privilege, separated by whitespace
characters. The machine name format can be a single host, a Java regular
expression, or an IPv4 address. The access privilege uses rw or ro to
specify read/write or read-only access of the machines to exports. If the
By default, the export can be mounted by any client. The value string
contains machine name and access privilege, separated by whitespace
characters. The machine name format can be a single host, a Java regular
expression, or an IPv4 address. The access privilege uses rw or ro to
specify read/write or read-only access of the machines to exports. If the
access privilege is not provided, the default is read-only. Entries are separated by ";".
For example: "192.168.0.0/22 rw ; host.*\.example\.com ; host1.test.org ro;".
Only the NFS gateway needs to restart after this property is updated.
Only the NFS gateway needs to restart after this property is updated.
</description>
</property>
@ -2044,7 +2066,7 @@
<name>hadoop.security.crypto.codec.classes.aes.ctr.nopadding</name>
<value>org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec</value>
<description>
Comma-separated list of crypto codec implementations for AES/CTR/NoPadding.
Comma-separated list of crypto codec implementations for AES/CTR/NoPadding.
The first implementation will be used if available, others are fallbacks.
</description>
</property>
@ -2061,7 +2083,7 @@
<name>hadoop.security.crypto.jce.provider</name>
<value></value>
<description>
The JCE provider name used in CryptoCodec.
The JCE provider name used in CryptoCodec.
</description>
</property>
@ -2069,7 +2091,7 @@
<name>hadoop.security.crypto.buffer.size</name>
<value>8192</value>
<description>
The buffer size used by CryptoInputStream and CryptoOutputStream.
The buffer size used by CryptoInputStream and CryptoOutputStream.
</description>
</property>
@ -2077,7 +2099,7 @@
<name>hadoop.security.java.secure.random.algorithm</name>
<value>SHA1PRNG</value>
<description>
The java secure random algorithm.
The java secure random algorithm.
</description>
</property>
@ -2085,7 +2107,7 @@
<name>hadoop.security.secure.random.impl</name>
<value></value>
<description>
Implementation of secure random.
Implementation of secure random.
</description>
</property>
@ -2156,7 +2178,7 @@
<value>0</value>
<description>The maximum number of concurrent connections a server is allowed
to accept. If this limit is exceeded, incoming connections will first fill
the listen queue and then may go to an OS-specific listen overflow queue.
the listen queue and then may go to an OS-specific listen overflow queue.
The client may fail or timeout, but the server can avoid running out of file
descriptors using this feature. 0 means no limit.
</description>

View File

@ -22,6 +22,7 @@
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AnonymousAWSCredentials;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -151,6 +152,16 @@ public AWSCredentials getCredentials() {
}
/**
* Returns the underlying list of providers.
*
* @return providers
*/
@VisibleForTesting
List<AWSCredentialsProvider> getProviders() {
return providers;
}
/**
* Verify that the provider list is not empty.
* @throws AmazonClientException if there are no providers.

View File

@ -40,6 +40,9 @@
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.net.URI;
import java.nio.file.AccessDeniedException;
import java.util.Date;
@ -66,6 +69,8 @@ public final class S3AUtils {
= "instantiation exception";
static final String NOT_AWS_PROVIDER =
"does not implement AWSCredentialsProvider";
static final String ABSTRACT_PROVIDER =
"is abstract and therefore cannot be created";
static final String ENDPOINT_KEY = "Endpoint";
private S3AUtils() {
@ -305,9 +310,15 @@ public static AWSCredentialProviderList createAWSCredentialProviderSet(
credentials.add(new BasicAWSCredentialsProvider(
creds.getUser(), creds.getPassword()));
credentials.add(new EnvironmentVariableCredentialsProvider());
credentials.add(new InstanceProfileCredentialsProvider());
credentials.add(
SharedInstanceProfileCredentialsProvider.getInstance());
} else {
for (Class<?> aClass : awsClasses) {
if (aClass == InstanceProfileCredentialsProvider.class) {
LOG.debug("Found {}, but will use {} instead.", aClass.getName(),
SharedInstanceProfileCredentialsProvider.class.getName());
aClass = SharedInstanceProfileCredentialsProvider.class;
}
credentials.add(createAWSCredentialProvider(conf,
aClass,
fsURI));
@ -317,7 +328,19 @@ public static AWSCredentialProviderList createAWSCredentialProviderSet(
}
/**
* Create an AWS credential provider.
* Create an AWS credential provider from its class by using reflection. The
* class must implement one of the following means of construction, which are
* attempted in order:
*
* <ol>
* <li>a public constructor accepting java.net.URI and
* org.apache.hadoop.conf.Configuration</li>
* <li>a public static method named getInstance that accepts no
* arguments and returns an instance of
* com.amazonaws.auth.AWSCredentialsProvider, or</li>
* <li>a public default constructor.</li>
* </ol>
*
* @param conf configuration
* @param credClass credential class
* @param uri URI of the FS
@ -328,32 +351,54 @@ static AWSCredentialsProvider createAWSCredentialProvider(
Configuration conf,
Class<?> credClass,
URI uri) throws IOException {
AWSCredentialsProvider credentials;
AWSCredentialsProvider credentials = null;
String className = credClass.getName();
if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) {
throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER);
}
try {
LOG.debug("Credential provider class is {}", className);
try {
credentials =
(AWSCredentialsProvider) credClass.getDeclaredConstructor(
URI.class, Configuration.class).newInstance(uri, conf);
} catch (NoSuchMethodException | SecurityException e) {
credentials =
(AWSCredentialsProvider) credClass.getDeclaredConstructor()
.newInstance();
}
} catch (NoSuchMethodException | SecurityException e) {
throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION
+". A class specified in %s must provide an accessible constructor "
+ "accepting URI and Configuration, or an accessible default "
+ "constructor.", className, AWS_CREDENTIALS_PROVIDER), e);
} catch (ReflectiveOperationException | IllegalArgumentException e) {
throw new IOException(className + " " + INSTANTIATION_EXCEPTION +".", e);
if (Modifier.isAbstract(credClass.getModifiers())) {
throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER);
}
LOG.debug("Credential provider class is {}", className);
try {
// new X(uri, conf)
Constructor cons = getConstructor(credClass, URI.class,
Configuration.class);
if (cons != null) {
credentials = (AWSCredentialsProvider)cons.newInstance(uri, conf);
return credentials;
}
// X.getInstance()
Method factory = getFactoryMethod(credClass, AWSCredentialsProvider.class,
"getInstance");
if (factory != null) {
credentials = (AWSCredentialsProvider)factory.invoke(null);
return credentials;
}
// new X()
cons = getConstructor(credClass);
if (cons != null) {
credentials = (AWSCredentialsProvider)cons.newInstance();
return credentials;
}
// no supported constructor or factory method found
throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION
+ ". A class specified in %s must provide a public constructor "
+ "accepting URI and Configuration, or a public factory method named "
+ "getInstance that accepts no arguments, or a public default "
+ "constructor.", className, AWS_CREDENTIALS_PROVIDER));
} catch (ReflectiveOperationException | IllegalArgumentException e) {
// supported constructor or factory method found, but the call failed
throw new IOException(className + " " + INSTANTIATION_EXCEPTION +".", e);
} finally {
if (credentials != null) {
LOG.debug("Using {} for {}.", credentials, uri);
}
}
LOG.debug("Using {} for {}.", credentials, uri);
return credentials;
}
/**
@ -499,4 +544,47 @@ public static int ensureOutputParameterInRange(String name, long size) {
return (int)size;
}
}
/**
* Returns the public constructor of {@code cl} specified by the list of
* {@code args} or {@code null} if {@code cl} has no public constructor that
* matches that specification.
* @param cl class
* @param args constructor argument types
* @return constructor or null
*/
private static Constructor<?> getConstructor(Class<?> cl, Class<?>... args) {
try {
Constructor cons = cl.getDeclaredConstructor(args);
return Modifier.isPublic(cons.getModifiers()) ? cons : null;
} catch (NoSuchMethodException | SecurityException e) {
return null;
}
}
/**
* Returns the public static method of {@code cl} that accepts no arguments
* and returns {@code returnType} specified by {@code methodName} or
* {@code null} if {@code cl} has no public static method that matches that
* specification.
* @param cl class
* @param returnType return type
* @param methodName method name
* @return method or null
*/
private static Method getFactoryMethod(Class<?> cl, Class<?> returnType,
String methodName) {
try {
Method m = cl.getDeclaredMethod(methodName);
if (Modifier.isPublic(m.getModifiers()) &&
Modifier.isStatic(m.getModifiers()) &&
returnType.isAssignableFrom(m.getReturnType())) {
return m;
} else {
return null;
}
} catch (NoSuchMethodException | SecurityException e) {
return null;
}
}
}

View File

@ -0,0 +1,67 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* A subclass of {@link InstanceProfileCredentialsProvider} that enforces
* instantiation of only a single instance.
* This credential provider calls the EC2 instance metadata service to obtain
* credentials. For highly multi-threaded applications, it's possible that
* multiple instances call the service simultaneously and overwhelm it with
* load. The service handles this by throttling the client with an HTTP 429
* response or forcibly terminating the connection. Forcing use of a single
* instance reduces load on the metadata service by allowing all threads to
* share the credentials. The base class is thread-safe, and there is nothing
* that varies in the credentials across different instances of
* {@link S3AFileSystem} connecting to different buckets, so sharing a singleton
* instance is safe.
*
* As of AWS SDK 1.11.39, the SDK code internally enforces a singleton. After
* Hadoop upgrades to that version or higher, it's likely that we can remove
* this class.
*/
@InterfaceAudience.Private
@InterfaceStability.Stable
public final class SharedInstanceProfileCredentialsProvider
extends InstanceProfileCredentialsProvider {
private static final SharedInstanceProfileCredentialsProvider INSTANCE =
new SharedInstanceProfileCredentialsProvider();
/**
* Returns the singleton instance.
*
* @return singleton instance
*/
public static SharedInstanceProfileCredentialsProvider getInstance() {
return INSTANCE;
}
/**
* Default constructor, defined explicitly as private to enforce singleton.
*/
private SharedInstanceProfileCredentialsProvider() {
super();
}
}

View File

@ -272,15 +272,37 @@ of `com.amazonaws.auth.AWSCredentialsProvider` may also be used.
com.amazonaws.auth.AWSCredentialsProvider.
These are loaded and queried in sequence for a valid set of credentials.
Each listed class must provide either an accessible constructor accepting
java.net.URI and org.apache.hadoop.conf.Configuration, or an accessible
default constructor.
Each listed class must implement one of the following means of
construction, which are attempted in order:
1. a public constructor accepting java.net.URI and
org.apache.hadoop.conf.Configuration,
2. a public static method named getInstance that accepts no
arguments and returns an instance of
com.amazonaws.auth.AWSCredentialsProvider, or
3. a public default constructor.
Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows
anonymous access to a publicly accessible S3 bucket without any credentials.
Please note that allowing anonymous access to an S3 bucket compromises
security and therefore is unsuitable for most use cases. It can be useful
for accessing public data sets without requiring AWS credentials.
If unspecified, then the default list of credential provider classes,
queried in sequence, is:
1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports
static configuration of AWS access key ID and secret access key.
See also fs.s3a.access.key and fs.s3a.secret.key.
2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports
configuration of AWS access key ID and secret access key in
environment variables named AWS_ACCESS_KEY_ID and
AWS_SECRET_ACCESS_KEY, as documented in the AWS SDK.
3. org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider:
a shared instance of
com.amazonaws.auth.InstanceProfileCredentialsProvider from the AWS
SDK, which supports use of instance profile credentials if running
in an EC2 VM. Using this shared instance potentially reduces load
on the EC2 instance metadata service for multi-threaded
applications.
</description>
</property>
@ -353,12 +375,13 @@ AWS Credential Providers are classes which can be used by the Amazon AWS SDK to
obtain an AWS login from a different source in the system, including environment
variables, JVM properties and configuration files.
There are three AWS Credential Providers inside the `hadoop-aws` JAR:
There are four AWS Credential Providers inside the `hadoop-aws` JAR:
| classname | description |
|-----------|-------------|
| `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider`| Session Credentials |
| `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider`| Simple name/secret credentials |
| `org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider`| Shared instance of EC2 Metadata Credentials, which can reduce load on the EC2 instance metadata service. (See below.) |
| `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider`| Anonymous Login |
There are also many in the Amazon SDKs, in particular two which are automatically
@ -370,6 +393,25 @@ set up in the authentication chain:
| `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`| AWS Environment Variables |
*EC2 Metadata Credentials with `SharedInstanceProfileCredentialsProvider`*
Applications running in EC2 may associate an IAM role with the VM and query the
[EC2 Instance Metadata Service](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html)
for credentials to access S3. Within the AWS SDK, this functionality is
provided by `InstanceProfileCredentialsProvider`. Heavily multi-threaded
applications may trigger a high volume of calls to the instance metadata service
and trigger throttling: either an HTTP 429 response or a forcible close of the
connection.
To mitigate against this problem, `hadoop-aws` ships with a variant of
`InstanceProfileCredentialsProvider` called
`SharedInstanceProfileCredentialsProvider`. Using this ensures that all
instances of S3A reuse the same instance profile credentials instead of issuing
a large volume of redundant metadata service calls. If
`fs.s3a.aws.credentials.provider` refers to
`com.amazonaws.auth.InstanceProfileCredentialsProvider`, S3A automatically uses
`org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider` instead.
*Session Credentials with `TemporaryAWSCredentialsProvider`*
[Temporary Security Credentials](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html)
@ -468,7 +510,7 @@ This means that the default S3A authentication chain can be defined as
<value>
org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,
com.amazonaws.auth.EnvironmentVariableCredentialsProvider,
com.amazonaws.auth.InstanceProfileCredentialsProvider
org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider
</value>
</property>

View File

@ -28,7 +28,6 @@
import org.apache.hadoop.fs.Path;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.Timeout;
import com.amazonaws.auth.AWSCredentials;
@ -41,12 +40,10 @@
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.*;
import static org.junit.Assert.*;
/**
* Tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic.
*
* Integration tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic.
*/
public class ITestS3AAWSCredentialsProvider {
private static final Logger LOG =
@ -55,21 +52,6 @@ public class ITestS3AAWSCredentialsProvider {
@Rule
public Timeout testTimeout = new Timeout(1 * 60 * 1000);
@Rule
public ExpectedException exception = ExpectedException.none();
/**
* Declare what exception to raise, and the text which must be found
* in it.
* @param exceptionClass class of exception
* @param text text in exception
*/
private void expectException(Class<? extends Throwable> exceptionClass,
String text) {
exception.expect(exceptionClass);
exception.expectMessage(text);
}
@Test
public void testBadConfiguration() throws IOException {
Configuration conf = new Configuration();
@ -154,97 +136,4 @@ public void testAnonymousProvider() throws Exception {
assertNotNull(stat);
assertEquals(testFile, stat.getPath());
}
/**
* A credential provider whose constructor signature doesn't match.
*/
static class ConstructorSignatureErrorProvider
implements AWSCredentialsProvider {
@SuppressWarnings("unused")
public ConstructorSignatureErrorProvider(String str) {
}
@Override
public AWSCredentials getCredentials() {
return null;
}
@Override
public void refresh() {
}
}
/**
* A credential provider whose constructor raises an NPE.
*/
static class ConstructorFailureProvider
implements AWSCredentialsProvider {
@SuppressWarnings("unused")
public ConstructorFailureProvider() {
throw new NullPointerException("oops");
}
@Override
public AWSCredentials getCredentials() {
return null;
}
@Override
public void refresh() {
}
}
@Test
public void testProviderWrongClass() throws Exception {
expectProviderInstantiationFailure(this.getClass().getName(),
NOT_AWS_PROVIDER);
}
@Test
public void testProviderNotAClass() throws Exception {
expectProviderInstantiationFailure("NoSuchClass",
"ClassNotFoundException");
}
private void expectProviderInstantiationFailure(String option,
String expectedErrorText) throws IOException {
Configuration conf = new Configuration();
conf.set(AWS_CREDENTIALS_PROVIDER, option);
Path testFile = new Path(
conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE));
expectException(IOException.class, expectedErrorText);
URI uri = testFile.toUri();
S3AUtils.createAWSCredentialProviderSet(uri, conf, uri);
}
@Test
public void testProviderConstructorError() throws Exception {
expectProviderInstantiationFailure(
ConstructorSignatureErrorProvider.class.getName(),
CONSTRUCTOR_EXCEPTION);
}
@Test
public void testProviderFailureError() throws Exception {
expectProviderInstantiationFailure(
ConstructorFailureProvider.class.getName(),
INSTANTIATION_EXCEPTION);
}
@Test
public void testInstantiationChain() throws Throwable {
Configuration conf = new Configuration();
conf.set(AWS_CREDENTIALS_PROVIDER,
TemporaryAWSCredentialsProvider.NAME
+ ", \t" + SimpleAWSCredentialsProvider.NAME
+ " ,\n " + AnonymousAWSCredentialsProvider.NAME);
Path testFile = new Path(
conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE));
URI uri = testFile.toUri();
S3AUtils.createAWSCredentialProviderSet(uri, conf, uri);
}
}

View File

@ -28,6 +28,7 @@
import java.io.IOException;
import java.net.URI;
import java.util.List;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
@ -36,7 +37,7 @@
/**
* Utilities for the S3A tests.
*/
public class S3ATestUtils {
public final class S3ATestUtils {
/**
* Value to set a system property to (in maven) to declare that
@ -130,7 +131,7 @@ public static FileContext createTestFileContext(Configuration conf)
throw new AssumptionViolatedException("No test filesystem in "
+ TEST_FS_S3A_NAME);
}
FileContext fc = FileContext.getFileContext(testURI,conf);
FileContext fc = FileContext.getFileContext(testURI, conf);
return fc;
}
@ -446,7 +447,7 @@ public boolean diffLessThanOrEquals(MetricDiff that) {
}
/**
* Get the statistic
* Get the statistic.
* @return the statistic
*/
public Statistic getStatistic() {
@ -461,4 +462,39 @@ public long getStartingValue() {
return startingValue;
}
}
/**
* Asserts that {@code obj} is an instance of {@code expectedClass} using a
* descriptive assertion message.
* @param expectedClass class
* @param obj object to check
*/
public static void assertInstanceOf(Class<?> expectedClass, Object obj) {
Assert.assertTrue(String.format("Expected instance of class %s, but is %s.",
expectedClass, obj.getClass()),
expectedClass.isAssignableFrom(obj.getClass()));
}
/**
* Builds a comma-separated list of class names.
* @param classes list of classes
* @return comma-separated list of class names
*/
public static <T extends Class<?>> String buildClassListString(
List<T> classes) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < classes.size(); ++i) {
if (i > 0) {
sb.append(',');
}
sb.append(classes.get(i).getName());
}
return sb.toString();
}
/**
* This class should not be instantiated.
*/
private S3ATestUtils() {
}
}

View File

@ -0,0 +1,273 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.*;
import static org.junit.Assert.*;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.List;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
/**
* Unit tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic.
*/
public class TestS3AAWSCredentialsProvider {
@Rule
public ExpectedException exception = ExpectedException.none();
@Test
public void testProviderWrongClass() throws Exception {
expectProviderInstantiationFailure(this.getClass().getName(),
NOT_AWS_PROVIDER);
}
@Test
public void testProviderAbstractClass() throws Exception {
expectProviderInstantiationFailure(AbstractProvider.class.getName(),
ABSTRACT_PROVIDER);
}
@Test
public void testProviderNotAClass() throws Exception {
expectProviderInstantiationFailure("NoSuchClass",
"ClassNotFoundException");
}
@Test
public void testProviderConstructorError() throws Exception {
expectProviderInstantiationFailure(
ConstructorSignatureErrorProvider.class.getName(),
CONSTRUCTOR_EXCEPTION);
}
@Test
public void testProviderFailureError() throws Exception {
expectProviderInstantiationFailure(
ConstructorFailureProvider.class.getName(),
INSTANTIATION_EXCEPTION);
}
@Test
public void testInstantiationChain() throws Throwable {
Configuration conf = new Configuration();
conf.set(AWS_CREDENTIALS_PROVIDER,
TemporaryAWSCredentialsProvider.NAME
+ ", \t" + SimpleAWSCredentialsProvider.NAME
+ " ,\n " + AnonymousAWSCredentialsProvider.NAME);
Path testFile = new Path(
conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE));
URI uri = testFile.toUri();
AWSCredentialProviderList list = S3AUtils.createAWSCredentialProviderSet(
uri, conf, uri);
List<Class<? extends AWSCredentialsProvider>> expectedClasses =
Arrays.asList(
TemporaryAWSCredentialsProvider.class,
SimpleAWSCredentialsProvider.class,
AnonymousAWSCredentialsProvider.class);
assertCredentialProviders(expectedClasses, list);
}
@Test
public void testDefaultChain() throws Exception {
URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2");
Configuration conf = new Configuration();
AWSCredentialProviderList list1 = S3AUtils.createAWSCredentialProviderSet(
uri1, conf, uri1);
AWSCredentialProviderList list2 = S3AUtils.createAWSCredentialProviderSet(
uri2, conf, uri2);
List<Class<? extends AWSCredentialsProvider>> expectedClasses =
Arrays.asList(
BasicAWSCredentialsProvider.class,
EnvironmentVariableCredentialsProvider.class,
SharedInstanceProfileCredentialsProvider.class);
assertCredentialProviders(expectedClasses, list1);
assertCredentialProviders(expectedClasses, list2);
assertSameInstanceProfileCredentialsProvider(list1.getProviders().get(2),
list2.getProviders().get(2));
}
@Test
public void testConfiguredChain() throws Exception {
URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2");
Configuration conf = new Configuration();
List<Class<? extends AWSCredentialsProvider>> expectedClasses =
Arrays.asList(
EnvironmentVariableCredentialsProvider.class,
SharedInstanceProfileCredentialsProvider.class,
AnonymousAWSCredentialsProvider.class);
conf.set(AWS_CREDENTIALS_PROVIDER, buildClassListString(expectedClasses));
AWSCredentialProviderList list1 = S3AUtils.createAWSCredentialProviderSet(
uri1, conf, uri1);
AWSCredentialProviderList list2 = S3AUtils.createAWSCredentialProviderSet(
uri2, conf, uri2);
assertCredentialProviders(expectedClasses, list1);
assertCredentialProviders(expectedClasses, list2);
assertSameInstanceProfileCredentialsProvider(list1.getProviders().get(1),
list2.getProviders().get(1));
}
@Test
public void testConfiguredChainUsesSharedInstanceProfile() throws Exception {
URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2");
Configuration conf = new Configuration();
List<Class<? extends AWSCredentialsProvider>> expectedClasses =
Arrays.<Class<? extends AWSCredentialsProvider>>asList(
InstanceProfileCredentialsProvider.class);
conf.set(AWS_CREDENTIALS_PROVIDER, buildClassListString(expectedClasses));
AWSCredentialProviderList list1 = S3AUtils.createAWSCredentialProviderSet(
uri1, conf, uri1);
AWSCredentialProviderList list2 = S3AUtils.createAWSCredentialProviderSet(
uri2, conf, uri2);
assertCredentialProviders(expectedClasses, list1);
assertCredentialProviders(expectedClasses, list2);
assertSameInstanceProfileCredentialsProvider(list1.getProviders().get(0),
list2.getProviders().get(0));
}
/**
* A credential provider declared as abstract, so it cannot be instantiated.
*/
static abstract class AbstractProvider implements AWSCredentialsProvider {
}
/**
* A credential provider whose constructor signature doesn't match.
*/
static class ConstructorSignatureErrorProvider
implements AWSCredentialsProvider {
@SuppressWarnings("unused")
public ConstructorSignatureErrorProvider(String str) {
}
@Override
public AWSCredentials getCredentials() {
return null;
}
@Override
public void refresh() {
}
}
/**
* A credential provider whose constructor raises an NPE.
*/
static class ConstructorFailureProvider
implements AWSCredentialsProvider {
@SuppressWarnings("unused")
public ConstructorFailureProvider() {
throw new NullPointerException("oops");
}
@Override
public AWSCredentials getCredentials() {
return null;
}
@Override
public void refresh() {
}
}
/**
* Declare what exception to raise, and the text which must be found
* in it.
* @param exceptionClass class of exception
* @param text text in exception
*/
private void expectException(Class<? extends Throwable> exceptionClass,
String text) {
exception.expect(exceptionClass);
exception.expectMessage(text);
}
private void expectProviderInstantiationFailure(String option,
String expectedErrorText) throws IOException {
Configuration conf = new Configuration();
conf.set(AWS_CREDENTIALS_PROVIDER, option);
Path testFile = new Path(
conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE));
expectException(IOException.class, expectedErrorText);
URI uri = testFile.toUri();
S3AUtils.createAWSCredentialProviderSet(uri, conf, uri);
}
/**
* Asserts expected provider classes in list.
* @param expectedClasses expected provider classes
* @param list providers to check
*/
private static void assertCredentialProviders(
List<Class<? extends AWSCredentialsProvider>> expectedClasses,
AWSCredentialProviderList list) {
assertNotNull(list);
List<AWSCredentialsProvider> providers = list.getProviders();
assertEquals(expectedClasses.size(), providers.size());
for (int i = 0; i < expectedClasses.size(); ++i) {
Class<? extends AWSCredentialsProvider> expectedClass =
expectedClasses.get(i);
AWSCredentialsProvider provider = providers.get(i);
assertNotNull(
String.format("At position %d, expected class is %s, but found null.",
i, expectedClass), provider);
assertTrue(
String.format("At position %d, expected class is %s, but found %s.",
i, expectedClass, provider.getClass()),
expectedClass.isAssignableFrom(provider.getClass()));
}
}
/**
* Asserts that two different references point to the same shared instance of
* InstanceProfileCredentialsProvider using a descriptive assertion message.
* @param provider1 provider to check
* @param provider2 provider to check
*/
private static void assertSameInstanceProfileCredentialsProvider(
AWSCredentialsProvider provider1, AWSCredentialsProvider provider2) {
assertNotNull(provider1);
assertInstanceOf(InstanceProfileCredentialsProvider.class, provider1);
assertNotNull(provider2);
assertInstanceOf(InstanceProfileCredentialsProvider.class, provider2);
assertSame("Expected all usage of InstanceProfileCredentialsProvider to "
+ "share a singleton instance, but found unique instances.",
provider1, provider2);
}
}