From 17be03e85ec78c8b90632af122e88a9c1cbdb13b Mon Sep 17 00:00:00 2001
From: David Pilato <david@pilato.fr>
Date: Mon, 27 Feb 2017 17:47:21 +0100
Subject: [PATCH] Add Backoff policy to azure repository

With this commit, Azure repositories are now using an Exponential Backoff policy before failing the backup.
It uses Azure SDK default values for this policy:

* `30s` delta backoff base with
   * `3s` min
   * `90s` max
* `3` retries max

Users can define the number of retries they wish by setting `cloud.azure.storage.xxx.max_retries` where `xxx` is the azure named account.

Closes #22728.
---
 docs/plugins/repository-azure.asciidoc        | 10 ++++++-
 .../storage/AzureStorageServiceImpl.java      |  7 +++++
 .../azure/storage/AzureStorageSettings.java   | 23 ++++++++++++---
 .../storage/AzureStorageServiceTests.java     | 28 +++++++++++++++++++
 4 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/docs/plugins/repository-azure.asciidoc b/docs/plugins/repository-azure.asciidoc
index a6efd674615..ab2f9dd05e9 100644
--- a/docs/plugins/repository-azure.asciidoc
+++ b/docs/plugins/repository-azure.asciidoc
@@ -72,6 +72,12 @@ It's not set by default which means that elasticsearch is using the
 http://azure.github.io/azure-storage-java/com/microsoft/azure/storage/RequestOptions.html#setTimeoutIntervalInMs(java.lang.Integer)[default value]
 set by the azure client (known as 5 minutes).
 
+`max_retries` can help to control the exponential backoff policy. It will fix the number of retries
+in case of failures before considering the snapshot is failing. Defaults to `3` retries.
+The initial backoff period is defined by Azure SDK as `30s`. Which means `30s` of wait time
+before retrying after a first timeout or failure. The maximum backoff period is defined by Azure SDK as
+`90s`.
+
 [source,yaml]
 ----
 cloud:
@@ -82,13 +88,15 @@ cloud:
                 account: your_azure_storage_account1
                 key: your_azure_storage_key1
                 default: true
+                max_retries: 7
             my_account2:
                 account: your_azure_storage_account2
                 key: your_azure_storage_key2
                 timeout: 30s
 ----
 
-In this example, timeout will be 10s for `my_account1` and 30s for `my_account2`.
+In this example, timeout will be `10s` per try for `my_account1` with `7` retries before failing
+and `30s` per try for `my_account2` with `3` retries.
 
 [[repository-azure-repository-settings]]
 ===== Repository settings
diff --git a/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java b/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java
index e1eea1f57f4..594715b845c 100644
--- a/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java
+++ b/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java
@@ -21,6 +21,8 @@ package org.elasticsearch.cloud.azure.storage;
 
 import com.microsoft.azure.storage.CloudStorageAccount;
 import com.microsoft.azure.storage.LocationMode;
+import com.microsoft.azure.storage.RetryExponentialRetry;
+import com.microsoft.azure.storage.RetryPolicy;
 import com.microsoft.azure.storage.StorageException;
 import com.microsoft.azure.storage.blob.BlobProperties;
 import com.microsoft.azure.storage.blob.CloudBlobClient;
@@ -147,6 +149,11 @@ public class AzureStorageServiceImpl extends AbstractComponent implements AzureS
                     "]. It can not be longer than 2,147,483,647ms.");
             }
         }
+
+        // We define a default exponential retry policy
+        client.getDefaultRequestOptions().setRetryPolicyFactory(
+            new RetryExponentialRetry(RetryPolicy.DEFAULT_CLIENT_BACKOFF, azureStorageSettings.getMaxRetries()));
+
         return client;
     }
 
diff --git a/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageSettings.java b/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageSettings.java
index 705e035895b..600d5fe97f8 100644
--- a/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageSettings.java
+++ b/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageSettings.java
@@ -19,6 +19,7 @@
 
 package org.elasticsearch.cloud.azure.storage;
 
+import com.microsoft.azure.storage.RetryPolicy;
 import org.elasticsearch.cloud.azure.storage.AzureStorageService.Storage;
 import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.settings.Setting;
@@ -41,20 +42,27 @@ public final class AzureStorageSettings {
         Setting.affixKeySetting(Storage.PREFIX, "key", (key) -> Setting.simpleString(key, Setting.Property.NodeScope));
     private static final Setting<Boolean> DEFAULT_SETTING =
         Setting.affixKeySetting(Storage.PREFIX, "default", (key) -> Setting.boolSetting(key, false, Setting.Property.NodeScope));
-
+    /**
+     * max_retries: Number of retries in case of Azure errors. Defaults to 3 (RetryPolicy.DEFAULT_CLIENT_RETRY_COUNT).
+     */
+    private static final Setting<Integer> MAX_RETRIES_SETTING =
+        Setting.affixKeySetting(Storage.PREFIX, "max_retries",
+            (key) -> Setting.intSetting(key, RetryPolicy.DEFAULT_CLIENT_RETRY_COUNT, Setting.Property.NodeScope));
 
     private final String name;
     private final String account;
     private final String key;
     private final TimeValue timeout;
     private final boolean activeByDefault;
+    private final int maxRetries;
 
-    public AzureStorageSettings(String name, String account, String key, TimeValue timeout, boolean activeByDefault) {
+    public AzureStorageSettings(String name, String account, String key, TimeValue timeout, boolean activeByDefault, int maxRetries) {
         this.name = name;
         this.account = account;
         this.key = key;
         this.timeout = timeout;
         this.activeByDefault = activeByDefault;
+        this.maxRetries = maxRetries;
     }
 
     public String getName() {
@@ -77,6 +85,10 @@ public final class AzureStorageSettings {
         return activeByDefault;
     }
 
+    public int getMaxRetries() {
+        return maxRetries;
+    }
+
     @Override
     public String toString() {
         final StringBuilder sb = new StringBuilder("AzureStorageSettings{");
@@ -85,6 +97,7 @@ public final class AzureStorageSettings {
         sb.append(", key='").append(key).append('\'');
         sb.append(", activeByDefault='").append(activeByDefault).append('\'');
         sb.append(", timeout=").append(timeout);
+        sb.append(", maxRetries=").append(maxRetries);
         sb.append('}');
         return sb.toString();
     }
@@ -110,7 +123,8 @@ public final class AzureStorageSettings {
                     getValue(settings, groupName, ACCOUNT_SETTING),
                     getValue(settings, groupName, KEY_SETTING),
                     getValue(settings, groupName, TIMEOUT_SETTING),
-                    getValue(settings, groupName, DEFAULT_SETTING))
+                    getValue(settings, groupName, DEFAULT_SETTING),
+                    getValue(settings, groupName, MAX_RETRIES_SETTING))
             );
         }
         return storageSettings;
@@ -128,7 +142,8 @@ public final class AzureStorageSettings {
         } else if (settings.size() == 1) {
             // the only storage settings belong (implicitly) to the default primary storage
             AzureStorageSettings storage = settings.get(0);
-            return new AzureStorageSettings(storage.getName(), storage.getAccount(), storage.getKey(), storage.getTimeout(), true);
+            return new AzureStorageSettings(storage.getName(), storage.getAccount(), storage.getKey(), storage.getTimeout(), true,
+                storage.getMaxRetries());
         } else {
             AzureStorageSettings primary = null;
             for (AzureStorageSettings setting : settings) {
diff --git a/plugins/repository-azure/src/test/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceTests.java b/plugins/repository-azure/src/test/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceTests.java
index ba377c03c47..0452380ceb9 100644
--- a/plugins/repository-azure/src/test/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceTests.java
+++ b/plugins/repository-azure/src/test/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceTests.java
@@ -20,6 +20,7 @@
 package org.elasticsearch.cloud.azure.storage;
 
 import com.microsoft.azure.storage.LocationMode;
+import com.microsoft.azure.storage.RetryExponentialRetry;
 import com.microsoft.azure.storage.blob.CloudBlobClient;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.test.ESTestCase;
@@ -28,7 +29,9 @@ import java.net.URI;
 import java.net.URISyntaxException;
 
 import static org.elasticsearch.cloud.azure.storage.AzureStorageServiceImpl.blobNameFromUri;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.nullValue;
 
 public class AzureStorageServiceTests extends ESTestCase {
@@ -143,6 +146,31 @@ public class AzureStorageServiceTests extends ESTestCase {
         assertThat(client1.getDefaultRequestOptions().getTimeoutIntervalInMs(), is(nullValue()));
     }
 
+    public void testGetSelectedClientBackoffPolicy() {
+        Settings timeoutSettings = Settings.builder()
+            .put("cloud.azure.storage.azure.account", "myaccount")
+            .put("cloud.azure.storage.azure.key", "mykey")
+            .build();
+
+        AzureStorageServiceImpl azureStorageService = new AzureStorageServiceMock(timeoutSettings);
+        CloudBlobClient client1 = azureStorageService.getSelectedClient("azure", LocationMode.PRIMARY_ONLY);
+        assertThat(client1.getDefaultRequestOptions().getRetryPolicyFactory(), is(notNullValue()));
+        assertThat(client1.getDefaultRequestOptions().getRetryPolicyFactory(), instanceOf(RetryExponentialRetry.class));
+    }
+
+    public void testGetSelectedClientBackoffPolicyNbRetries() {
+        Settings timeoutSettings = Settings.builder()
+            .put("cloud.azure.storage.azure.account", "myaccount")
+            .put("cloud.azure.storage.azure.key", "mykey")
+            .put("cloud.azure.storage.azure.max_retries", 7)
+            .build();
+
+        AzureStorageServiceImpl azureStorageService = new AzureStorageServiceMock(timeoutSettings);
+        CloudBlobClient client1 = azureStorageService.getSelectedClient("azure", LocationMode.PRIMARY_ONLY);
+        assertThat(client1.getDefaultRequestOptions().getRetryPolicyFactory(), is(notNullValue()));
+        assertThat(client1.getDefaultRequestOptions().getRetryPolicyFactory(), instanceOf(RetryExponentialRetry.class));
+    }
+
     /**
      * This internal class just overload createClient method which is called by AzureStorageServiceImpl.doStart()
      */