From a530ac3f50d71c608235168acefe2f8eb1753131 Mon Sep 17 00:00:00 2001 From: Shashikant Banerjee Date: Mon, 30 Sep 2019 15:42:04 +0200 Subject: [PATCH] HDDS-2153. Add a config to tune max pending requests in Ratis leader Closes #1474 --- .../java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 5 +++++ .../java/org/apache/hadoop/ozone/OzoneConfigKeys.java | 5 +++++ hadoop-hdds/common/src/main/resources/ozone-default.xml | 8 ++++++++ .../common/transport/server/ratis/XceiverServerRatis.java | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index f00ecb23c3b..161780668ab 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -105,6 +105,11 @@ public final class ScmConfigKeys { // TODO: Set to 1024 once RATIS issue around purge is fixed. public static final int DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT = 1000000; + + public static final String DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS = + "dfs.container.ratis.leader.num.pending.requests"; + public static final int + DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT = 4096; // expiry interval stateMachineData cache entry inside containerStateMachine public static final String DFS_CONTAINER_RATIS_STATEMACHINEDATA_CACHE_EXPIRY_INTERVAL = diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 9050ebda2dc..a3d1c4ab288 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -329,6 +329,11 @@ public final class OzoneConfigKeys { ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_PURGE_GAP; public static final int DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT = ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT; + public static final String DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS = + ScmConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS; + public static final int + DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT = + ScmConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT; public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY = ScmConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 9e4c5ea2f1f..31bc65240d2 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -203,6 +203,14 @@ will use for writing chunks (60 by default). + + dfs.container.ratis.leader.num.pending.requests + 4096 + OZONE, RATIS, PERFORMANCE + Maximum number of pending requests after which the leader + starts rejecting requests from client. + + dfs.container.ratis.replication.level MAJORITY diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index 746bfb86f68..abedcd4ae34 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.tracing.TracingUtil; import org.apache.hadoop.ozone.OzoneConfigKeys; + import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; @@ -225,6 +226,11 @@ public final class XceiverServerRatis extends XceiverServer { setAutoTriggerEnabled(properties, true); RaftServerConfigKeys.Snapshot. setAutoTriggerThreshold(properties, snapshotThreshold); + int maxPendingRequets = conf.getInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS, + OzoneConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT + ); + RaftServerConfigKeys.Write.setElementLimit(properties, maxPendingRequets); int logQueueNumElements = conf.getInt(OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS, OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS_DEFAULT);