HADOOP-12963 Allow using path style addressing for accessing the s3 endpoint. (Stephen Montgomery via stevel)

This commit is contained in:
Steve Loughran 2016-04-14 12:44:55 +01:00
parent a59e05ad0a
commit e124c3a2ae
5 changed files with 75 additions and 6 deletions

View File

@ -760,6 +760,13 @@
</description>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<description>Enable S3 path style access ie disabling the default virtual hosting behaviour.
Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
</description>
</property>
<property>
<name>fs.s3a.proxy.host</name>
<description>Hostname of the (optional) proxy server for S3 connections.</description>

View File

@ -28,13 +28,17 @@ public class Constants {
// number of simultaneous connections to s3
public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum";
public static final int DEFAULT_MAXIMUM_CONNECTIONS = 15;
// connect to s3 over ssl?
public static final String SECURE_CONNECTIONS = "fs.s3a.connection.ssl.enabled";
public static final boolean DEFAULT_SECURE_CONNECTIONS = true;
//use a custom endpoint?
public static final String ENDPOINT = "fs.s3a.endpoint";
//Enable path style access? Overrides default virtual hosting
public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access";
//connect to s3 through a proxy server?
public static final String PROXY_HOST = "fs.s3a.proxy.host";
public static final String PROXY_PORT = "fs.s3a.proxy.port";
@ -50,7 +54,7 @@ public class Constants {
// seconds until we give up trying to establish a connection to s3
public static final String ESTABLISH_TIMEOUT = "fs.s3a.connection.establish.timeout";
public static final int DEFAULT_ESTABLISH_TIMEOUT = 50000;
// seconds until we give up on a connection to s3
public static final String SOCKET_TIMEOUT = "fs.s3a.connection.timeout";
public static final int DEFAULT_SOCKET_TIMEOUT = 200000;
@ -79,7 +83,7 @@ public class Constants {
// size of each of or multipart pieces in bytes
public static final String MULTIPART_SIZE = "fs.s3a.multipart.size";
public static final long DEFAULT_MULTIPART_SIZE = 104857600; // 100 MB
// minimum size in bytes before we start a multipart uploads or copy
public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold";
public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE;

View File

@ -41,6 +41,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain;
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
@ -302,6 +303,15 @@ public class S3AFileSystem extends FileSystem {
throw new IllegalArgumentException(msg, e);
}
}
enablePathStyleAccessIfRequired(conf);
}
private void enablePathStyleAccessIfRequired(Configuration conf) {
final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
if (pathStyleAccess) {
LOG.debug("Enabling path style access!");
s3.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true));
}
}
private void initTransferManager() {

View File

@ -221,6 +221,13 @@ this capability.
</description>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<description>Enable S3 path style access ie disabling the default virtual hosting behaviour.
Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
</description>
</property>
<property>
<name>fs.s3a.proxy.host</name>
<description>Hostname of the (optional) proxy server for S3 connections.</description>

View File

@ -19,10 +19,14 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import org.apache.commons.lang.StringUtils;
import com.amazonaws.AmazonClientException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
@ -30,17 +34,19 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.File;
import java.net.URI;
import java.io.IOException;
import java.lang.reflect.Field;
import org.apache.hadoop.security.ProviderUtils;
import org.apache.hadoop.security.alias.CredentialProvider;
import org.apache.hadoop.security.alias.CredentialProviderFactory;
import org.apache.http.HttpStatus;
import org.junit.rules.TemporaryFolder;
public class TestS3AConfiguration {
@ -354,4 +360,39 @@ public class TestS3AConfiguration {
assertEquals("SecretKey incorrect.", "456", creds.getAccessSecret());
}
@Test
public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() throws Exception {
conf = new Configuration();
conf.set(Constants.PATH_STYLE_ACCESS, Boolean.toString(true));
assertTrue(conf.getBoolean(Constants.PATH_STYLE_ACCESS, false));
try {
fs = S3ATestUtils.createTestFileSystem(conf);
final Object object = getClientOptionsField(fs.getAmazonS3Client(), "clientOptions");
assertNotNull(object);
assertTrue("Unexpected type found for clientOptions!", object instanceof S3ClientOptions);
assertTrue("Expected to find path style access to be switched on!", ((S3ClientOptions) object).isPathStyleAccess());
byte[] file = ContractTestUtils.toAsciiByteArray("test file");
ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
} catch (final AmazonS3Exception e) {
LOG.error("Caught exception: ", e);
// Catch/pass standard path style access behaviour when live bucket
// isn't in the same region as the s3 client default. See
// http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
assertEquals(e.getStatusCode(), HttpStatus.SC_MOVED_PERMANENTLY);
}
}
private Object getClientOptionsField(AmazonS3Client s3client, String field)
throws NoSuchFieldException, IllegalAccessException {
final Field clientOptionsProps = s3client.getClass().getDeclaredField(field);
assertNotNull(clientOptionsProps);
if (!clientOptionsProps.isAccessible()) {
clientOptionsProps.setAccessible(true);
}
final Object object = clientOptionsProps.get(s3client);
return object;
}
}