diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index a3cd1532e67..3a527f7f0c3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1524,7 +1524,7 @@ private void initializeClient(URI uri, String fileSystemName, private AbfsClientContext populateAbfsClientContext() { return new AbfsClientContextBuilder() .withExponentialRetryPolicy( - new ExponentialRetryPolicy(abfsConfiguration.getMaxIoRetries())) + new ExponentialRetryPolicy(abfsConfiguration)) .withAbfsCounters(abfsCounters) .withAbfsPerfTracker(abfsPerfTracker) .build(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java index 9a75c78aa06..89d99471a82 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java @@ -21,6 +21,7 @@ import java.util.Random; import java.net.HttpURLConnection; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** @@ -89,6 +90,16 @@ public ExponentialRetryPolicy(final int maxIoRetries) { DEFAULT_CLIENT_BACKOFF); } + /** + * Initializes a new instance of the {@link ExponentialRetryPolicy} class. + * + * @param conf The {@link AbfsConfiguration} from which to retrieve retry configuration. + */ + public ExponentialRetryPolicy(AbfsConfiguration conf) { + this(conf.getMaxIoRetries(), conf.getMinBackoffIntervalMilliseconds(), conf.getMaxBackoffIntervalMilliseconds(), + conf.getBackoffIntervalMilliseconds()); + } + /** * Initializes a new instance of the {@link ExponentialRetryPolicy} class. * diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index e8eae8c489e..dfb7f3f42a5 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -338,7 +338,7 @@ with the following configurations. retries. Default value is 5. * `fs.azure.oauth.token.fetch.retry.min.backoff.interval`: Minimum back-off interval. Added to the retry interval computed from delta backoff. By - default this si set as 0. Set the interval in milli seconds. + default this is set as 0. Set the interval in milli seconds. * `fs.azure.oauth.token.fetch.retry.max.backoff.interval`: Maximum back-off interval. Default value is 60000 (sixty seconds). Set the interval in milli seconds. @@ -800,9 +800,31 @@ The following configs are related to read and write operations. `fs.azure.io.retry.max.retries`: Sets the number of retries for IO operations. Currently this is used only for the server call retry logic. Used within -AbfsClient class as part of the ExponentialRetryPolicy. The value should be +`AbfsClient` class as part of the ExponentialRetryPolicy. The value should be greater than or equal to 0. +`fs.azure.io.retry.min.backoff.interval`: Sets the minimum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value indicates the smallest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 3000 (3 seconds). + +`fs.azure.io.retry.max.backoff.interval`: Sets the maximum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value indicates the largest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 30000 (30 seconds). + +`fs.azure.io.retry.backoff.interval`: Sets the default backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value is used to compute a random delta between 80% and 120% of the specified +value. This random delta is then multiplied by an exponent of the current IO +retry number (i.e., the default is multiplied by `2^(retryNum - 1)`) and then +contstrained within the range of [`fs.azure.io.retry.min.backoff.interval`, +`fs.azure.io.retry.max.backoff.interval`] to determine the amount of time to +wait before the next IO retry attempt. The default value is 3000 (3 seconds). + `fs.azure.write.request.size`: To set the write buffer size. Specify the value in bytes. The value should be between 16384 to 104857600 both inclusive (16 KB to 100 MB). The default value will be 8388608 (8 MB). @@ -859,7 +881,7 @@ when there are too many writes from the same process. ### Security Options `fs.azure.always.use.https`: Enforces to use HTTPS instead of HTTP when the flag -is made true. Irrespective of the flag, AbfsClient will use HTTPS if the secure +is made true. Irrespective of the flag, `AbfsClient` will use HTTPS if the secure scheme (ABFSS) is used or OAuth is used for authentication. By default this will be set to true. diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md index cf3b2344456..933f86be3e8 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md @@ -448,7 +448,7 @@ use requires the presence of secret credentials, where tests may be slow, and where finding out why something failed from nothing but the test output is critical. -#### Subclasses Existing Shared Base Blasses +#### Subclasses Existing Shared Base Classes There are a set of base classes which should be extended for Azure tests and integration tests. @@ -602,7 +602,7 @@ various test combinations, it will: 2. Run tests for all combinations 3. Summarize results across all the test combination runs. -As a pre-requiste step, fill config values for test accounts and credentials +As a pre-requisite step, fill config values for test accounts and credentials needed for authentication in `src/test/resources/azure-auth-keys.xml.template` and rename as `src/test/resources/azure-auth-keys.xml`. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java index e10419f148b..0f8dc55aa14 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -18,6 +18,11 @@ package org.apache.hadoop.fs.azurebfs.services; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_IO_RETRIES; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MIN_BACKOFF_INTERVAL; + import java.util.Random; import org.junit.Assert; @@ -32,7 +37,6 @@ * Unit test TestExponentialRetryPolicy. */ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest { - private final int maxRetryCount = 30; private final int noRetryCount = 0; private final int retryCount = new Random().nextInt(maxRetryCount); @@ -57,12 +61,38 @@ public void testDifferentMaxIORetryCount() throws Exception { @Test public void testDefaultMaxIORetryCount() throws Exception { AbfsConfiguration abfsConfig = getAbfsConfig(); - Assert.assertTrue( + Assert.assertEquals( String.format("default maxIORetry count is %s.", maxRetryCount), - abfsConfig.getMaxIoRetries() == maxRetryCount); + maxRetryCount, abfsConfig.getMaxIoRetries()); testMaxIOConfig(abfsConfig); } + @Test + public void testAbfsConfigConstructor() throws Exception { + // Ensure we choose expected values that are not defaults + ExponentialRetryPolicy template = new ExponentialRetryPolicy( + getAbfsConfig().getMaxIoRetries()); + int testModifier = 1; + int expectedMaxRetries = template.getRetryCount() + testModifier; + int expectedMinBackoff = template.getMinBackoff() + testModifier; + int expectedMaxBackoff = template.getMaxBackoff() + testModifier; + int expectedDeltaBackoff = template.getDeltaBackoff() + testModifier; + + Configuration config = new Configuration(this.getRawConfiguration()); + config.setInt(AZURE_MAX_IO_RETRIES, expectedMaxRetries); + config.setInt(AZURE_MIN_BACKOFF_INTERVAL, expectedMinBackoff); + config.setInt(AZURE_MAX_BACKOFF_INTERVAL, expectedMaxBackoff); + config.setInt(AZURE_BACKOFF_INTERVAL, expectedDeltaBackoff); + + ExponentialRetryPolicy policy = new ExponentialRetryPolicy( + new AbfsConfiguration(config, "dummyAccountName")); + + Assert.assertEquals("Max retry count was not set as expected.", expectedMaxRetries, policy.getRetryCount()); + Assert.assertEquals("Min backoff interval was not set as expected.", expectedMinBackoff, policy.getMinBackoff()); + Assert.assertEquals("Max backoff interval was not set as expected.", expectedMaxBackoff, policy.getMaxBackoff()); + Assert.assertEquals("Delta backoff interval was not set as expected.", expectedDeltaBackoff, policy.getDeltaBackoff()); + } + private AbfsConfiguration getAbfsConfig() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); @@ -81,8 +111,8 @@ private void testMaxIOConfig(AbfsConfiguration abfsConfig) { localRetryCount++; } - Assert.assertTrue( + Assert.assertEquals( "When all retries are exhausted, the retryCount will be same as max configured", - localRetryCount == abfsConfig.getMaxIoRetries()); + abfsConfig.getMaxIoRetries(), localRetryCount); } }