HADOOP-16346. Stabilize S3A OpenSSL support.

Introduces `openssl` as an option for `fs.s3a.ssl.channel.mode`.
The new option is documented and marked as experimental.

For details on how to use this, consult the peformance document
in the s3a documentation.

This patch is the successor to HADOOP-16050 "S3A SSL connections
should use OpenSSL" -which was reverted because of
incompatibilities between the wildfly OpenSSL client and the AWS
HTTPS servers (HADOOP-16347). With the Wildfly release moved up
to 1.0.7.Final (HADOOP-16405) everything should now work.

Related issues:

* HADOOP-15669. ABFS: Improve HTTPS Performance
* HADOOP-16050: S3A SSL connections should use OpenSSL
* HADOOP-16371: Option to disable GCM for SSL connections when running on Java 8
* HADOOP-16405: Upgrade Wildfly Openssl version to 1.0.7.Final

Contributed by Sahil Takiar

Change-Id: I80a4bc5051519f186b7383b2c1cea140be42444e
This commit is contained in:
Sahil Takiar 2020-01-21 16:37:51 +00:00 committed by Steve Loughran
parent d887e49dd4
commit f206b736f0
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
9 changed files with 103 additions and 25 deletions

View File

@ -346,6 +346,11 @@
<dependency>
<groupId>org.wildfly.openssl</groupId>
<artifactId>wildfly-openssl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wildfly.openssl</groupId>
<artifactId>wildfly-openssl-java</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>

View File

@ -58,6 +58,10 @@ import org.wildfly.openssl.SSL;
* SSL with no modification to the list of enabled ciphers.</li>
* </ul>
* </p>
*
* In order to load OpenSSL, applications must ensure the wildfly-openssl
* artifact is on the classpath. Currently, only ABFS and S3A provide
* wildfly-openssl as a runtime dependency.
*/
public final class DelegatingSSLSocketFactory extends SSLSocketFactory {
@ -170,8 +174,14 @@ public final class DelegatingSSLSocketFactory extends SSLSocketFactory {
OpenSSLProvider.register();
openSSLProviderRegistered = true;
}
java.util.logging.Logger logger = java.util.logging.Logger.getLogger(
SSL.class.getName());
logger.setLevel(Level.WARNING);
ctx = SSLContext.getInstance("openssl.TLS");
ctx.init(null, null, null);
// Strong reference needs to be kept to logger until initialization of
// SSLContext finished (see HADOOP-16174):
logger.setLevel(Level.INFO);
channelMode = SSLChannelMode.OpenSSL;
break;
case Default_JSSE:

View File

@ -1978,11 +1978,16 @@
<description>
If secure connections to S3 are enabled, configures the SSL
implementation used to encrypt connections to S3. Supported values are:
"default_jsse" and "default_jsse_with_gcm". "default_jsse" uses the Java
Secure Socket Extension package (JSSE). However, when running on Java 8,
the GCM cipher is removed from the list of enabled ciphers. This is due
to performance issues with GCM in Java 8. "default_jsse_with_gcm" uses
the JSSE with the default list of cipher suites.
"default_jsse", "default_jsse_with_gcm", "default", and "openssl".
"default_jsse" uses the Java Secure Socket Extension package (JSSE).
However, when running on Java 8, the GCM cipher is removed from the list
of enabled ciphers. This is due to performance issues with GCM in Java 8.
"default_jsse_with_gcm" uses the JSSE with the default list of cipher
suites. "default_jsse_with_gcm" is equivalent to the behavior prior to
this feature being introduced. "default" attempts to use OpenSSL rather
than the JSSE for SSL encryption, if OpenSSL libraries cannot be loaded,
it falls back to the "default_jsse" behavior. "openssl" attempts to use
OpenSSL as well, but fails if OpenSSL libraries cannot be loaded.
</description>
</property>

View File

@ -196,6 +196,7 @@
<jline.version>3.9.0</jline.version>
<powermock.version>1.5.6</powermock.version>
<solr.version>7.7.0</solr.version>
<openssl-wildfly.version>1.0.7.Final</openssl-wildfly.version>
</properties>
<dependencyManagement>
@ -1370,7 +1371,12 @@
<dependency>
<groupId>org.wildfly.openssl</groupId>
<artifactId>wildfly-openssl</artifactId>
<version>1.0.7.Final</version>
<version>${openssl-wildfly.version}</version>
</dependency>
<dependency>
<groupId>org.wildfly.openssl</groupId>
<artifactId>wildfly-openssl-java</artifactId>
<version>${openssl-wildfly.version}</version>
</dependency>
<dependency>

View File

@ -430,6 +430,11 @@
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wildfly.openssl</groupId>
<artifactId>wildfly-openssl</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>

View File

@ -80,13 +80,6 @@ public class NetworkBinding {
throw new IllegalArgumentException(channelModeString +
" is not a valid value for " + SSL_CHANNEL_MODE);
}
if (channelMode == DelegatingSSLSocketFactory.SSLChannelMode.OpenSSL ||
channelMode == DelegatingSSLSocketFactory.SSLChannelMode.Default) {
throw new UnsupportedOperationException("S3A does not support " +
"setting " + SSL_CHANNEL_MODE + " " +
DelegatingSSLSocketFactory.SSLChannelMode.OpenSSL + " or " +
DelegatingSSLSocketFactory.SSLChannelMode.Default);
}
// Look for AWS_SOCKET_FACTORY_CLASSNAME on the classpath and instantiate
// an instance using the DelegatingSSLSocketFactory as the

View File

@ -535,10 +535,41 @@ GCM cipher is only disabled on Java 8. GCM performance has been improved
in Java 9, so if `default_jsse` is specified and applications run on Java
9, they should see no difference compared to running with the vanilla JSSE.
Other options for `fs.s3a.ssl.channel.mode` include `default_jsse_with_gcm`.
This option includes GCM in the list of cipher suites on Java 8, so it is
equivalent to running with the vanilla JSSE. The naming convention is setup
in order to preserve backwards compatibility with HADOOP-15669.
`fs.s3a.ssl.channel.mode` can be set to `default_jsse_with_gcm`. This option
includes GCM in the list of cipher suites on Java 8, so it is equivalent to
running with the vanilla JSSE.
### OpenSSL Acceleration
**Experimental Feature**
As of HADOOP-16050 and HADOOP-16346, `fs.s3a.ssl.channel.mode` can be set to
either `default` or `openssl` to enable native OpenSSL acceleration of HTTPS
requests. OpenSSL implements the SSL and TLS protocols using native code. For
users reading a large amount of data over HTTPS, OpenSSL can provide a
significant performance benefit over the JSSE.
S3A uses the
[WildFly OpenSSL](https://github.com/wildfly-security/wildfly-openssl) library
to bind OpenSSL to the Java JSSE APIs. This library allows S3A to
transparently read data using OpenSSL. The wildfly-openssl library is a
runtime dependency of S3A and contains native libraries for binding the Java
JSSE to OpenSSL.
WildFly OpenSSL must load OpenSSL itself. This can be done using the system
property `org.wildfly.openssl.path`. For example,
`HADOOP_OPTS="-Dorg.wildfly.openssl.path=<path to OpenSSL libraries>
${HADOOP_OPTS}"`. See WildFly OpenSSL documentation for more details.
When `fs.s3a.ssl.channel.mode` is set to `default`, S3A will attempt to load
the OpenSSL libraries using the WildFly library. If it is unsuccessful, it
will fall back to the `default_jsse` behavior.
When `fs.s3a.ssl.channel.mode` is set to `openssl`, S3A will attempt to load
the OpenSSL libraries using WildFly. If it is unsuccessful, it will throw an
exception and S3A initialization will fail.
### `fs.s3a.ssl.channel.mode` Configuration
`fs.s3a.ssl.channel.mode` can be configured as follows:
@ -549,11 +580,16 @@ in order to preserve backwards compatibility with HADOOP-15669.
<description>
If secure connections to S3 are enabled, configures the SSL
implementation used to encrypt connections to S3. Supported values are:
"default_jsse" and "default_jsse_with_gcm". "default_jsse" uses the Java
Secure Socket Extension package (JSSE). However, when running on Java 8,
the GCM cipher is removed from the list of enabled ciphers. This is due
to performance issues with GCM in Java 8. "default_jsse_with_gcm" uses
the JSSE with the default list of cipher suites.
"default_jsse", "default_jsse_with_gcm", "default", and "openssl".
"default_jsse" uses the Java Secure Socket Extension package (JSSE).
However, when running on Java 8, the GCM cipher is removed from the list
of enabled ciphers. This is due to performance issues with GCM in Java 8.
"default_jsse_with_gcm" uses the JSSE with the default list of cipher
suites. "default_jsse_with_gcm" is equivalent to the behavior prior to
this feature being introduced. "default" attempts to use OpenSSL rather
than the JSSE for SSL encryption, if OpenSSL libraries cannot be loaded,
it falls back to the "default_jsse" behavior. "openssl" attempts to use
OpenSSL as well, but fails if OpenSSL libraries cannot be loaded.
</description>
</property>
```
@ -564,6 +600,11 @@ Supported values for `fs.s3a.ssl.channel.mode`:
|-------------------------------|-------------|
| default_jsse | Uses Java JSSE without GCM on Java 8 |
| default_jsse_with_gcm | Uses Java JSSE |
| default | Uses OpenSSL, falls back to default_jsse if OpenSSL cannot be loaded |
| openssl | Uses OpenSSL, fails if OpenSSL cannot be loaded |
The naming convention is setup in order to preserve backwards compatibility
with HADOOP-15669.
Other options may be added to `fs.s3a.ssl.channel.mode` in the future as
further SSL optimizations are made.

View File

@ -24,6 +24,7 @@ import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@ -41,6 +42,7 @@ import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInputPolicy;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
import org.apache.hadoop.util.NativeCodeLoader;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.hadoop.fs.s3a.Constants.INPUT_FADVISE;
@ -55,6 +57,9 @@ import static org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory.
SSLChannelMode.Default_JSSE;
import static org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory.
SSLChannelMode.Default_JSSE_with_GCM;
import static org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory.
SSLChannelMode.OpenSSL;
import static org.junit.Assume.assumeTrue;
/**
@ -84,7 +89,7 @@ public class ITestS3AContractSeek extends AbstractContractSeekTest {
public static Collection<Object[]> params() {
return Arrays.asList(new Object[][]{
{INPUT_FADV_SEQUENTIAL, Default_JSSE},
{INPUT_FADV_RANDOM, Default_JSSE_with_GCM},
{INPUT_FADV_RANDOM, OpenSSL},
{INPUT_FADV_NORMAL, Default_JSSE_with_GCM},
});
}
@ -200,6 +205,14 @@ public class ITestS3AContractSeek extends AbstractContractSeekTest {
return (S3AFileSystem) super.getFileSystem();
}
@Before
public void validateSSLChannelMode() {
if (this.sslChannelMode == OpenSSL) {
assumeTrue(NativeCodeLoader.isNativeCodeLoaded() &&
NativeCodeLoader.buildSupportsOpenssl());
}
}
@Test
public void testReadPolicyInFS() throws Throwable {
describe("Verify the read policy is being consistently set");

View File

@ -194,7 +194,7 @@
<dependency>
<groupId>org.wildfly.openssl</groupId>
<artifactId>wildfly-openssl</artifactId>
<scope>compile</scope>
<scope>runtime</scope>
</dependency>
<!--com.fasterxml.jackson is used by WASB, not ABFS-->