diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index f3dbc5b6d4b..746541a4997 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -760,6 +760,13 @@ + + fs.s3a.path.style.access + Enable S3 path style access ie disabling the default virtual hosting behaviour. + Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. + + + fs.s3a.proxy.host Hostname of the (optional) proxy server for S3 connections. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index faa760cac38..a1707479622 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -28,13 +28,17 @@ public class Constants { // number of simultaneous connections to s3 public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum"; public static final int DEFAULT_MAXIMUM_CONNECTIONS = 15; - + // connect to s3 over ssl? public static final String SECURE_CONNECTIONS = "fs.s3a.connection.ssl.enabled"; public static final boolean DEFAULT_SECURE_CONNECTIONS = true; //use a custom endpoint? public static final String ENDPOINT = "fs.s3a.endpoint"; + + //Enable path style access? Overrides default virtual hosting + public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access"; + //connect to s3 through a proxy server? public static final String PROXY_HOST = "fs.s3a.proxy.host"; public static final String PROXY_PORT = "fs.s3a.proxy.port"; @@ -50,7 +54,7 @@ public class Constants { // seconds until we give up trying to establish a connection to s3 public static final String ESTABLISH_TIMEOUT = "fs.s3a.connection.establish.timeout"; public static final int DEFAULT_ESTABLISH_TIMEOUT = 50000; - + // seconds until we give up on a connection to s3 public static final String SOCKET_TIMEOUT = "fs.s3a.connection.timeout"; public static final int DEFAULT_SOCKET_TIMEOUT = 200000; @@ -79,7 +83,7 @@ public class Constants { // size of each of or multipart pieces in bytes public static final String MULTIPART_SIZE = "fs.s3a.multipart.size"; public static final long DEFAULT_MULTIPART_SIZE = 104857600; // 100 MB - + // minimum size in bytes before we start a multipart uploads or copy public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold"; public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 23d17fb0384..83d1d53cf53 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -41,6 +41,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.S3ClientOptions; import com.amazonaws.services.s3.model.CannedAccessControlList; import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; @@ -302,6 +303,15 @@ public class S3AFileSystem extends FileSystem { throw new IllegalArgumentException(msg, e); } } + enablePathStyleAccessIfRequired(conf); + } + + private void enablePathStyleAccessIfRequired(Configuration conf) { + final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false); + if (pathStyleAccess) { + LOG.debug("Enabling path style access!"); + s3.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true)); + } } private void initTransferManager() { diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 6c3f31e84c0..15b9837f72f 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -221,6 +221,13 @@ this capability. + + fs.s3a.path.style.access + Enable S3 path style access ie disabling the default virtual hosting behaviour. + Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. + + + fs.s3a.proxy.host Hostname of the (optional) proxy server for S3 connections. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java index ae1539d4c8d..4a0bfbbfc07 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java @@ -19,10 +19,14 @@ package org.apache.hadoop.fs.s3a; import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.S3ClientOptions; +import com.amazonaws.services.s3.model.AmazonS3Exception; + import org.apache.commons.lang.StringUtils; import com.amazonaws.AmazonClientException; import org.apache.hadoop.conf.Configuration; - +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; @@ -30,17 +34,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.File; import java.net.URI; -import java.io.IOException; +import java.lang.reflect.Field; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProviderFactory; - +import org.apache.http.HttpStatus; import org.junit.rules.TemporaryFolder; public class TestS3AConfiguration { @@ -354,4 +360,39 @@ public class TestS3AConfiguration { assertEquals("SecretKey incorrect.", "456", creds.getAccessSecret()); } + + @Test + public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() throws Exception { + + conf = new Configuration(); + conf.set(Constants.PATH_STYLE_ACCESS, Boolean.toString(true)); + assertTrue(conf.getBoolean(Constants.PATH_STYLE_ACCESS, false)); + + try { + fs = S3ATestUtils.createTestFileSystem(conf); + final Object object = getClientOptionsField(fs.getAmazonS3Client(), "clientOptions"); + assertNotNull(object); + assertTrue("Unexpected type found for clientOptions!", object instanceof S3ClientOptions); + assertTrue("Expected to find path style access to be switched on!", ((S3ClientOptions) object).isPathStyleAccess()); + byte[] file = ContractTestUtils.toAsciiByteArray("test file"); + ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); + } catch (final AmazonS3Exception e) { + LOG.error("Caught exception: ", e); + // Catch/pass standard path style access behaviour when live bucket + // isn't in the same region as the s3 client default. See + // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html + assertEquals(e.getStatusCode(), HttpStatus.SC_MOVED_PERMANENTLY); + } + } + + private Object getClientOptionsField(AmazonS3Client s3client, String field) + throws NoSuchFieldException, IllegalAccessException { + final Field clientOptionsProps = s3client.getClass().getDeclaredField(field); + assertNotNull(clientOptionsProps); + if (!clientOptionsProps.isAccessible()) { + clientOptionsProps.setAccessible(true); + } + final Object object = clientOptionsProps.get(s3client); + return object; + } }