From a68671eaf750d414e16d8e0d3c1e79a9e90fadd9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 2 Nov 2021 13:26:16 +0000 Subject: [PATCH] HADOOP-17928. Syncable: S3A to warn and downgrade (#3585) This switches the default behavior of S3A output streams to warning that Syncable.hsync() or hflush() have been called; it's not considered an error unless the defaults are overridden. This avoids breaking applications which call the APIs, at the risk of people trying to use S3 as a safe store of streamed data (HBase WALs, audit logs etc). Contributed by Steve Loughran. Change-Id: I0a02ec1e622343619f147f94158c18928a73a885 --- .../src/main/resources/core-default.xml | 11 ++++- .../org/apache/hadoop/fs/s3a/Constants.java | 2 +- .../tools/hadoop-aws/troubleshooting_s3a.md | 40 ++++++++++++++----- .../fs/s3a/TestS3ABlockOutputStream.java | 4 ++ 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 1e0e0b9932e..38afa607ad0 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -2188,7 +2188,16 @@ - + + fs.s3a.downgrade.syncable.exceptions + true + + Warn but continue when applications use Syncable.hsync when writing + to S3A. + + + + fs.AbstractFileSystem.wasb.impl org.apache.hadoop.fs.azure.Wasb diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 56ea7d00069..461fe87d331 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -387,7 +387,7 @@ public final class Constants { * Value: {@value}. */ public static final boolean DOWNGRADE_SYNCABLE_EXCEPTIONS_DEFAULT = - false; + true; /** * The capacity of executor queues for operations other than block diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 6f55d9effed..d55e5229108 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -924,30 +924,48 @@ connector isn't saving any data at all. The `Syncable` API, especially the `hsync()` call, are critical for applications such as HBase to safely persist data. -The S3A connector throws an `UnsupportedOperationException` when these API calls -are made, because the guarantees absolutely cannot be met: nothing is being flushed -or saved. +When configured to do so, the S3A connector throws an `UnsupportedOperationException` +when these API calls are made, because the API guarantees absolutely cannot be met: +_nothing is being flushed or saved_. -* Applications which intend to invoke the Syncable APIs call `hasCapability("hsync")` on +* Applications which intend to invoke the Syncable APIs should call `hasCapability("hsync")` on the stream to see if they are supported. * Or catch and downgrade `UnsupportedOperationException`. -These recommendations _apply to all filesystems_. +These recommendations _apply to all filesystems_. -To downgrade the S3A connector to simply warning of the use of +For consistency with other filesystems, S3A output streams +do not by default reject the `Syncable` calls -instead +they print a warning of its use. + + +The count of invocations of the two APIs are collected in the S3A filesystem +Statistics/IOStatistics and so their use can be monitored. + +To switch the S3A connector to rejecting all use of `hsync()` or `hflush()` calls, set the option -`fs.s3a.downgrade.syncable.exceptions` to true. +`fs.s3a.downgrade.syncable.exceptions` to `false`. ```xml fs.s3a.downgrade.syncable.exceptions - true + false ``` -The count of invocations of the two APIs are collected -in the S3A filesystem Statistics/IOStatistics and so -their use can be monitored. +Regardless of the setting, the `Syncable` API calls do not work. +Telling the store to *not* downgrade the calls is a way to +1. Prevent applications which require Syncable to work from being deployed + against S3. +2. Identify applications which are making the calls even though they don't + need to. These applications can then be fixed -something which may take + time. + +Put differently: it is safest to disable downgrading syncable exceptions. +However, enabling the downgrade stops applications unintentionally using the API +from breaking. + +*Tip*: try turning it on in staging environments to see what breaks. ### `RemoteFileChangedException` and read-during-overwrite diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java index 9ae24c19f3d..21f268dfb2e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java @@ -141,6 +141,10 @@ public class TestS3ABlockOutputStream extends AbstractS3AMockTest { */ @Test public void testSyncableUnsupported() throws Exception { + final S3ABlockOutputStream.BlockOutputStreamBuilder + builder = mockS3ABuilder(); + builder.withDowngradeSyncableExceptions(false); + stream = spy(new S3ABlockOutputStream(builder)); intercept(UnsupportedOperationException.class, () -> stream.hflush()); intercept(UnsupportedOperationException.class, () -> stream.hsync()); }