From 5ddefdd50751ed316f2eb9046f294bbdcdfb2428 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Mon, 5 Nov 2018 10:10:10 -0800 Subject: [PATCH] HDDS-794. Add configs to set StateMachineData write timeout in ContainerStateMachine. Contributed by Shashikant Banerjee. --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 6 ++++++ .../apache/hadoop/ozone/OzoneConfigKeys.java | 9 +++++++++ .../src/main/resources/ozone-default.xml | 7 +++++++ .../server/ratis/ContainerStateMachine.java | 18 ++++++++++++++++-- .../server/ratis/XceiverServerRatis.java | 14 ++++++++++++++ .../container/keyvalue/helpers/ChunkUtils.java | 2 ++ .../keyvalue/impl/ChunkManagerImpl.java | 3 ++- 7 files changed, 56 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 56692afaa87..38eec61db85 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -79,6 +79,12 @@ public final class ScmConfigKeys { "dfs.container.ratis.segment.preallocated.size"; public static final int DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT = 128 * 1024 * 1024; + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT = + "dfs.container.ratis.statemachinedata.sync.timeout"; + public static final TimeDuration + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT = + TimeDuration.valueOf(10, TimeUnit.SECONDS); public static final String DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY = "dfs.ratis.client.request.timeout.duration"; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 3b4f017c19d..54b1cf8d446 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -229,6 +229,15 @@ public final class OzoneConfigKeys { = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY; public static final int DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT; + + // config settings to enable stateMachineData write timeout + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT; + public static final TimeDuration + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT; + public static final int DFS_CONTAINER_CHUNK_MAX_SIZE = ScmConfigKeys.OZONE_SCM_CHUNK_MAX_SIZE; public static final String DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index eb686626f4c..5ff60ebc456 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -52,6 +52,13 @@ running unit tests. + + dfs.container.ratis.statemachinedata.sync.timeout + 10s + OZONE, DEBUG, CONTAINER, RATIS + Timeout for StateMachine data writes by Ratis. + + dfs.container.ratis.datanode.storage.dir diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index d5762bcbbf7..2a4a2278063 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -307,9 +307,18 @@ public class ContainerStateMachine extends BaseStateMachine { () -> runCommand(requestProto), chunkExecutor); } writeChunkFutureMap.put(entryIndex, writeChunkFuture); + LOG.debug("writeChunk writeStateMachineData : blockId " + write.getBlockID() + + " logIndex " + entryIndex + " chunkName " + write.getChunkData() + .getChunkName()); // Remove the future once it finishes execution from the // writeChunkFutureMap. - writeChunkFuture.thenApply(r -> writeChunkFutureMap.remove(entryIndex)); + writeChunkFuture.thenApply(r -> { + writeChunkFutureMap.remove(entryIndex); + LOG.debug("writeChunk writeStateMachineData completed: blockId " + write + .getBlockID() + " logIndex " + entryIndex + " chunkName " + write + .getChunkData().getChunkName()); + return r; + }); return writeChunkFuture; } @@ -531,7 +540,12 @@ public class ContainerStateMachine extends BaseStateMachine { if (cmdType == Type.CreateContainer) { long containerID = requestProto.getContainerID(); future.thenApply( - r -> createContainerFutureMap.remove(containerID).complete(null)); + r -> { + createContainerFutureMap.remove(containerID).complete(null); + LOG.info("create Container Transaction completed for container " + + containerID + " log index " + index); + return r; + }); } future.thenAccept(m -> { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index 599f821b8ea..a679e5eab9a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -186,6 +186,20 @@ public final class XceiverServerRatis implements XceiverServerSpi { RaftClientConfigKeys.Rpc .setRequestTimeout(properties, clientRequestTimeout); + // set the configs enable and set the stateMachineData sync timeout + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, true); + timeUnit = OzoneConfigKeys. + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT.getUnit(); + duration = conf.getTimeDuration( + OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT, + OzoneConfigKeys. + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT + .getDuration(), timeUnit); + final TimeDuration dataSyncTimeout = + TimeDuration.valueOf(duration, timeUnit); + RaftServerConfigKeys.Log.StateMachineData + .setSyncTimeout(properties, dataSyncTimeout); + // Set the server Request timeout timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT .getUnit(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java index 718f5ded6ef..8f9d5892176 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java @@ -138,6 +138,8 @@ public final class ChunkUtils { } } } + log.debug("Write Chunk completed for chunkFile: {}, size {}", chunkFile, + data.length); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java index c630e1990e1..cdd19dff0b5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java @@ -82,7 +82,8 @@ public class ChunkManagerImpl implements ChunkManager { chunkFile, info); File tmpChunkFile = getTmpChunkFile(chunkFile, info); - LOG.debug("writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file", + LOG.debug( + "writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file:{}", info.getChunkName(), stage, chunkFile, tmpChunkFile); switch (stage) {