Add config for http client connect timeout (#16831)

Adds a configuration clientConnectTimeout to our http client config which controls the connection timeout for our http client requests.

It was observed that on busy K8S clusters, the default connect timeout of 500ms is sometimes not enough time to complete syn/acks for a request and in these cases, the requests timeout with the error:
exceptionType=java.net.SocketTimeoutException, exceptionMessage=Connect Timeout
This behavior was mostly observed on the router while forwarding queries to the broker.
Having a slightly higher connect timeout helped resolve these issues.
This commit is contained in:
Atul Mohan 2024-08-07 07:01:10 -07:00 committed by GitHub
parent 84192b11d7
commit 76ad17fb4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 4 deletions

View File

@ -840,6 +840,7 @@ All Druid components can communicate with each other over HTTP.
|`druid.global.http.readTimeout`|The timeout for data reads.|`PT15M`|
|`druid.global.http.unusedConnectionTimeout`|The timeout for idle connections in connection pool. The connection in the pool will be closed after this timeout and a new one will be established. This timeout should be less than `druid.global.http.readTimeout`. Set this timeout = ~90% of `druid.global.http.readTimeout`|`PT4M`|
|`druid.global.http.numMaxThreads`|Maximum number of I/O worker threads|`max(10, ((number of cores * 17) / 16 + 2) + 30)`|
|`druid.global.http.clientConnectTimeout`|The timeout (in milliseconds) for establishing client connections.|500|
### Common endpoints configuration
@ -1879,6 +1880,8 @@ client has the following configuration options.
|`druid.broker.http.unusedConnectionTimeout`|The timeout for idle connections in connection pool. The connection in the pool will be closed after this timeout and a new one will be established. This timeout should be less than `druid.broker.http.readTimeout`. Set this timeout = ~90% of `druid.broker.http.readTimeout`|`PT4M`|
|`druid.broker.http.maxQueuedBytes`|Maximum number of bytes queued per query before exerting [backpressure](../operations/basic-cluster-tuning.md#broker-backpressure) on channels to the data servers.<br /><br />Similar to `druid.server.http.maxScatterGatherBytes`, except that `maxQueuedBytes` triggers [backpressure](../operations/basic-cluster-tuning.md#broker-backpressure) instead of query failure. Set to zero to disable. You can override this setting by using the [`maxQueuedBytes` query context parameter](../querying/query-context.md). Druid supports [human-readable](human-readable-byte.md) format. |25 MB or 2% of maximum Broker heap size, whichever is greater.|
|`druid.broker.http.numMaxThreads`|`Maximum number of I/O worker threads|max(10, ((number of cores * 17) / 16 + 2) + 30)`|
|`druid.broker.http.clientConnectTimeout`|The timeout (in milliseconds) for establishing client connections.|500|
##### Retry policy
@ -2239,3 +2242,4 @@ Supported query contexts:
|`druid.router.http.numMaxThreads`|Maximum number of worker threads to handle HTTP requests and responses|`max(10, ((number of cores * 17) / 16 + 2) + 30)`|
|`druid.router.http.numRequestsQueued`|Maximum number of requests that may be queued to a destination|`1024`|
|`druid.router.http.requestBuffersize`|Size of the content buffer for receiving requests. These buffers are only used for active connections that have requests with bodies that will not fit within the header buffer|`8 * 1024`|
|`druid.router.http.clientConnectTimeout`|The timeout (in milliseconds) for establishing client connections.|500|

View File

@ -27,6 +27,7 @@ import org.joda.time.Duration;
import org.joda.time.Period;
import javax.validation.constraints.Min;
import java.util.concurrent.TimeUnit;
/**
*
@ -72,6 +73,9 @@ public class DruidHttpClientConfig
@JsonProperty
private Boolean eagerInitialization = null;
@JsonProperty
private long clientConnectTimeout = TimeUnit.MILLISECONDS.toMillis(500);
public int getNumConnections()
{
return numConnections;
@ -129,6 +133,11 @@ public class DruidHttpClientConfig
return eagerInitialization;
}
public long getClientConnectTimeout()
{
return clientConnectTimeout;
}
private static HumanReadableBytes computeDefaultMaxQueuedBytes()
{
return HumanReadableBytes.valueOf(

View File

@ -33,15 +33,12 @@ import org.eclipse.jetty.util.thread.QueuedThreadPool;
import javax.net.ssl.SSLContext;
import java.lang.annotation.Annotation;
import java.util.concurrent.TimeUnit;
/**
*
*/
public class JettyHttpClientModule implements Module
{
private static final long CLIENT_CONNECT_TIMEOUT_MILLIS = TimeUnit.MILLISECONDS.toMillis(500);
public static JettyHttpClientModule global()
{
return new JettyHttpClientModule("druid.global.http", Global.class);
@ -91,7 +88,7 @@ public class JettyHttpClientModule implements Module
httpClient.setIdleTimeout(config.getReadTimeout().getMillis());
httpClient.setMaxConnectionsPerDestination(config.getNumConnections());
httpClient.setMaxRequestsQueuedPerDestination(config.getNumRequestsQueued());
httpClient.setConnectTimeout(CLIENT_CONNECT_TIMEOUT_MILLIS);
httpClient.setConnectTimeout(config.getClientConnectTimeout());
httpClient.setRequestBufferSize(config.getRequestBuffersize());
final QueuedThreadPool pool = new QueuedThreadPool(config.getNumMaxThreads());
pool.setName(JettyHttpClientModule.class.getSimpleName() + "-threadPool-" + pool.hashCode());