From 31614bcc7cda614c45769aa779a839b25c375db2 Mon Sep 17 00:00:00 2001 From: Shashikant Banerjee Date: Fri, 9 Nov 2018 00:05:45 +0530 Subject: [PATCH] HDDS-806. Update Ratis to latest snapshot version in ozone. Contributed by Tsz Wo Nicholas Sze and Mukul Kumar Singh. --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 8 ++++ .../apache/hadoop/ozone/OzoneConfigKeys.java | 10 ++++ .../src/main/resources/ozone-default.xml | 15 ++++++ .../server/ratis/ContainerStateMachine.java | 48 ++++++++----------- .../server/ratis/XceiverServerRatis.java | 11 +++++ hadoop-hdds/pom.xml | 2 +- hadoop-ozone/pom.xml | 2 +- 7 files changed, 67 insertions(+), 29 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 38eec61db85..cedcc43cbb0 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -85,6 +85,14 @@ public final class ScmConfigKeys { public static final TimeDuration DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT = TimeDuration.valueOf(10, TimeUnit.SECONDS); + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES = + "dfs.container.ratis.statemachinedata.sync.retries"; + public static final int + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT = -1; + public static final String DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE = + "dfs.container.ratis.log.queue.size"; + public static final int DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT = 128; public static final String DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY = "dfs.ratis.client.request.timeout.duration"; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 54b1cf8d446..97768173f93 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -261,6 +261,16 @@ public final class OzoneConfigKeys { public static final TimeDuration DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT = ScmConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT; + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES; + public static final int + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT; + public static final String DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE = + ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE; + public static final int DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT = + ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT; public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY = ScmConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 5ff60ebc456..2ffc2abb7cf 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -59,6 +59,21 @@ Timeout for StateMachine data writes by Ratis. + + dfs.container.ratis.statemachinedata.sync.retries + -1 + OZONE, DEBUG, CONTAINER, RATIS + Number of times the WriteStateMachineData op will be tried + before failing, if this value is -1, then this retries indefinitely. + + + + dfs.container.ratis.log.queue.size + 128 + OZONE, DEBUG, CONTAINER, RATIS + Number of operation pending with Raft's Log Worker. + + dfs.container.ratis.datanode.storage.dir diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 2a4a2278063..38b789e8db8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.helpers.BlockData; -import org.apache.ratis.proto.RaftProtos.StateMachineEntryProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerRole; import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.server.RaftServer; @@ -55,7 +55,6 @@ import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.TransactionContextImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -206,7 +205,6 @@ public TransactionContext startTransaction(RaftClientRequest request) final ContainerCommandRequestProto proto = getRequestProto(request.getMessage().getContent()); Preconditions.checkArgument(request.getRaftGroupId().equals(gid)); - final StateMachineLogEntryProto log; if (proto.getCmdType() == Type.WriteChunk) { final WriteChunkRequestProto write = proto.getWriteChunk(); // create the state machine data proto @@ -236,33 +234,29 @@ public TransactionContext startTransaction(RaftClientRequest request) .setWriteChunk(commitWriteChunkProto) .build(); - log = createSMLogEntryProto(request, - commitContainerCommandProto.toByteString(), - dataContainerCommandProto.toByteString()); + return TransactionContext.newBuilder() + .setClientRequest(request) + .setStateMachine(this) + .setServerRole(RaftPeerRole.LEADER) + .setStateMachineData(dataContainerCommandProto.toByteString()) + .setLogData(commitContainerCommandProto.toByteString()) + .build(); } else if (proto.getCmdType() == Type.CreateContainer) { - log = createSMLogEntryProto(request, - request.getMessage().getContent(), request.getMessage().getContent()); + return TransactionContext.newBuilder() + .setClientRequest(request) + .setStateMachine(this) + .setServerRole(RaftPeerRole.LEADER) + .setStateMachineData(request.getMessage().getContent()) + .setLogData(request.getMessage().getContent()) + .build(); } else { - log = createSMLogEntryProto(request, request.getMessage().getContent(), - null); + return TransactionContext.newBuilder() + .setClientRequest(request) + .setStateMachine(this) + .setServerRole(RaftPeerRole.LEADER) + .setLogData(request.getMessage().getContent()) + .build(); } - return new TransactionContextImpl(this, request, log); - } - - private StateMachineLogEntryProto createSMLogEntryProto(RaftClientRequest r, - ByteString logData, ByteString smData) { - StateMachineLogEntryProto.Builder builder = - StateMachineLogEntryProto.newBuilder(); - - builder.setCallId(r.getCallId()) - .setClientId(r.getClientId().toByteString()) - .setLogData(logData); - - if (smData != null) { - builder.setStateMachineEntry(StateMachineEntryProto.newBuilder() - .setStateMachineData(smData).build()); - } - return builder.build(); } private ByteString getStateMachineData(StateMachineLogEntryProto entryProto) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index a679e5eab9a..7bf4da9b699 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -287,6 +287,17 @@ private RaftProperties newRaftProperties(Configuration conf) { setAutoTriggerEnabled(properties, true); RaftServerConfigKeys.Snapshot. setAutoTriggerThreshold(properties, snapshotThreshold); + int logQueueSize = + conf.getInt(OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE, + OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT); + RaftServerConfigKeys.Log.setQueueSize(properties, logQueueSize); + + int numSyncRetries = conf.getInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES, + OzoneConfigKeys. + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT); + RaftServerConfigKeys.Log.StateMachineData.setSyncTimeoutRetry(properties, + numSyncRetries); return properties; } diff --git a/hadoop-hdds/pom.xml b/hadoop-hdds/pom.xml index 090a53772b4..7a1704c553b 100644 --- a/hadoop-hdds/pom.xml +++ b/hadoop-hdds/pom.xml @@ -45,7 +45,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> 0.4.0-SNAPSHOT - 0.3.0-1d2ebee-SNAPSHOT + 0.3.0-1d07b18-SNAPSHOT 1.60 diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index 33af31b34d7..671421ef82b 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -33,7 +33,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> 3.2.1-SNAPSHOT 0.4.0-SNAPSHOT 0.4.0-SNAPSHOT - 0.3.0-1d2ebee-SNAPSHOT + 0.3.0-1d07b18-SNAPSHOT 1.60 Badlands ${ozone.version}