mirror of https://github.com/apache/druid.git
Make zk connection retries configurable (#13913)
* This makes the zookeeper connection retry count configurable. This is presently hardcoded to 29 tries which ends up taking a long time for the druid node to shutdown in case of ZK connectivity loss. Having a shorter retry count helps k8s deployments to fail fast. In situations where the underlying k8s node loses network connectivity or is no longer able to talk to zookeeper, failing fast can trigger pod restarts which can then reassign the pod to a healthy k8s node. Existing behavior is preserved, but users can override this property if needed.
This commit is contained in:
parent
143fdcfacf
commit
617c325c70
|
@ -58,6 +58,11 @@ public class CuratorConfig
|
|||
@JsonProperty("authScheme")
|
||||
private String authScheme = "digest";
|
||||
|
||||
// Configures the maximum number of retries for attempting connection to Zookeeper.
|
||||
// Smaller retry counts helps nodes to fail fast in case of ZK connection loss.
|
||||
@JsonProperty("maxZkRetries")
|
||||
private int maxZkRetries = 29;
|
||||
|
||||
public static CuratorConfig create(String hosts)
|
||||
{
|
||||
CuratorConfig config = new CuratorConfig();
|
||||
|
@ -131,4 +136,9 @@ public class CuratorConfig
|
|||
{
|
||||
return authScheme;
|
||||
}
|
||||
|
||||
public int getMaxZkRetries()
|
||||
{
|
||||
return maxZkRetries;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,7 +49,6 @@ public class CuratorModule implements Module
|
|||
|
||||
static final int BASE_SLEEP_TIME_MS = 1000;
|
||||
static final int MAX_SLEEP_TIME_MS = 45000;
|
||||
private static final int MAX_RETRIES = 29;
|
||||
|
||||
private final boolean haltOnFailedStart;
|
||||
|
||||
|
@ -89,7 +88,7 @@ public class CuratorModule implements Module
|
|||
);
|
||||
}
|
||||
|
||||
RetryPolicy retryPolicy = new BoundedExponentialBackoffRetry(BASE_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS, MAX_RETRIES);
|
||||
RetryPolicy retryPolicy = new BoundedExponentialBackoffRetry(BASE_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS, config.getMaxZkRetries());
|
||||
|
||||
return builder
|
||||
.ensembleProvider(new FixedEnsembleProvider(config.getZkHosts()))
|
||||
|
|
|
@ -33,6 +33,7 @@ public class CuratorConfigTest extends JsonConfigTesterBase<CuratorConfig>
|
|||
propertyValues.put(getPropertyKey("user"), "test-zk-user");
|
||||
propertyValues.put(getPropertyKey("pwd"), "test-zk-pwd");
|
||||
propertyValues.put(getPropertyKey("authScheme"), "auth");
|
||||
propertyValues.put(getPropertyKey("maxZkRetries"), "20");
|
||||
testProperties.putAll(propertyValues);
|
||||
configProvider.inject(testProperties, configurator);
|
||||
CuratorConfig config = configProvider.get().get();
|
||||
|
@ -41,6 +42,7 @@ public class CuratorConfigTest extends JsonConfigTesterBase<CuratorConfig>
|
|||
Assert.assertEquals("test-zk-user", config.getZkUser());
|
||||
Assert.assertEquals("test-zk-pwd", config.getZkPwd());
|
||||
Assert.assertEquals("auth", config.getAuthScheme());
|
||||
Assert.assertEquals(20, config.getMaxZkRetries());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -51,5 +53,6 @@ public class CuratorConfigTest extends JsonConfigTesterBase<CuratorConfig>
|
|||
Assert.assertEquals(false, config.getEnableAcl());
|
||||
Assert.assertNull(config.getZkUser());
|
||||
Assert.assertEquals("digest", config.getAuthScheme());
|
||||
Assert.assertEquals(29, config.getMaxZkRetries());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue