From dc2740804330f555dd3262b1db33add7c7ab4ff4 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Thu, 18 Oct 2018 22:17:27 -0700 Subject: [PATCH] HDDS-612. Even after setting hdds.scm.chillmode.enabled to false, SCM allocateblock fails with ChillModePrecheck exception. Contributed by Hanisha Koneru. --- .../hdds/scm/block/BlockManagerImpl.java | 2 +- .../hdds/scm/server/ChillModePrecheck.java | 15 ++++++++++- .../hdds/scm/server/SCMChillModeManager.java | 27 ++++++++++++------- .../scm/server/SCMClientProtocolServer.java | 7 ++--- .../scm/server/StorageContainerManager.java | 21 ++++++++------- .../hadoop/ozone/om/TestScmChillMode.java | 22 ++++++++++++++- 6 files changed, 69 insertions(+), 25 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index b791aaddc5e..246e4f6e697 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -123,7 +123,7 @@ public BlockManagerImpl(final Configuration conf, blockDeletingService = new SCMBlockDeletingService(deletedBlockLog, containerManager, nodeManager, eventPublisher, svcInterval, serviceTimeout, conf); - chillModePrecheck = new ChillModePrecheck(); + chillModePrecheck = new ChillModePrecheck(conf); } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java index b92413e80ca..a3f4c903af8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hdds.scm.server; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; @@ -29,9 +31,20 @@ * */ public class ChillModePrecheck implements Precheck { - private AtomicBoolean inChillMode = new AtomicBoolean(true); + private AtomicBoolean inChillMode; public static final String PRECHECK_TYPE = "ChillModePrecheck"; + public ChillModePrecheck(Configuration conf) { + boolean chillModeEnabled = conf.getBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, + HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT); + if (chillModeEnabled) { + inChillMode = new AtomicBoolean(true); + } else { + inChillMode = new AtomicBoolean(false); + } + } + public boolean check(ScmOps op) throws SCMException { if (inChillMode.get() && ChillModeRestrictedOps .isRestrictedInChillMode(op)) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java index 7135267cdcc..6a342d40f97 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java @@ -58,6 +58,7 @@ public class SCMChillModeManager implements private static final Logger LOG = LoggerFactory.getLogger(SCMChillModeManager.class); + private final boolean isChillModeEnabled; private AtomicBoolean inChillMode = new AtomicBoolean(true); private AtomicLong containerWithMinReplicas = new AtomicLong(0); private Map exitRules = new HashMap(1); @@ -70,14 +71,17 @@ public class SCMChillModeManager implements EventQueue eventQueue) { this.config = conf; this.eventPublisher = eventQueue; - exitRules.put(CONT_EXIT_RULE, - new ContainerChillModeRule(config, allContainers)); - exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config)); - if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, - HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) { + this.isChillModeEnabled = conf.getBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, + HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT); + if (isChillModeEnabled) { + exitRules.put(CONT_EXIT_RULE, + new ContainerChillModeRule(config, allContainers)); + exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config)); + emitChillModeStatus(); + } else { exitChillMode(eventQueue); } - emitChillModeStatus(); } /** @@ -85,7 +89,7 @@ public class SCMChillModeManager implements */ @VisibleForTesting public void emitChillModeStatus() { - eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, inChillMode.get()); + eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode()); } private void validateChillModeExitRules(EventPublisher eventQueue) { @@ -99,7 +103,7 @@ private void validateChillModeExitRules(EventPublisher eventQueue) { /** * Exit chill mode. It does following actions: - * 1. Set chill mode status to fale. + * 1. Set chill mode status to false. * 2. Emits START_REPLICATION for ReplicationManager. * 3. Cleanup resources. * 4. Emit chill mode status. @@ -131,6 +135,9 @@ public void onMessage( } public boolean getInChillMode() { + if (!isChillModeEnabled) { + return false; + } return inChillMode.get(); } @@ -218,7 +225,7 @@ public void process(NodeRegistrationContainerReport reportsProto) { } } }); - if(inChillMode.get()) { + if(getInChillMode()) { LOG.info("SCM in chill mode. {} % containers have at least one" + " reported replica.", (containerWithMinReplicas.get() / maxContainer) * 100); @@ -268,7 +275,7 @@ public void process(NodeRegistrationContainerReport reportsProto) { return; } - if(inChillMode.get()) { + if(getInChillMode()) { registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid()); registeredDns = registeredDnSet.size(); LOG.info("SCM in chill mode. {} DataNodes registered, {} required.", diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index badcec792f8..89a6c81ebfc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -86,12 +86,13 @@ public class SCMClientProtocolServer implements private final InetSocketAddress clientRpcAddress; private final StorageContainerManager scm; private final OzoneConfiguration conf; - private ChillModePrecheck chillModePrecheck = new ChillModePrecheck(); + private ChillModePrecheck chillModePrecheck; public SCMClientProtocolServer(OzoneConfiguration conf, StorageContainerManager scm) throws IOException { this.scm = scm; this.conf = conf; + chillModePrecheck = new ChillModePrecheck(conf); final int handlerCount = conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_DEFAULT); @@ -357,8 +358,8 @@ public StorageContainerManager getScm() { * Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event. */ @Override - public void onMessage(Boolean inChillMOde, EventPublisher publisher) { - chillModePrecheck.setInChillMode(inChillMOde); + public void onMessage(Boolean inChillMode, EventPublisher publisher) { + chillModePrecheck.setInChillMode(inChillMode); } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 22039e54cd2..242310f588b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -230,9 +230,7 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException { ContainerReportHandler containerReportHandler = new ContainerReportHandler(containerManager, scmNodeManager, replicationStatus); - scmChillModeManager = new SCMChillModeManager(conf, - containerManager.getContainers(), - eventQueue); + PipelineActionEventHandler pipelineActionEventHandler = new PipelineActionEventHandler(); @@ -292,8 +290,6 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException { eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler); eventQueue.addHandler(SCMEvents.START_REPLICATION, replicationStatus.getReplicationStatusListener()); - eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, - replicationStatus.getChillModeStatusListener()); eventQueue .addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler); eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS, @@ -301,13 +297,20 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException { eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionEventHandler); eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler); - eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, - scmChillModeManager); - eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, - (BlockManagerImpl) scmBlockManager); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer); eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, + replicationStatus.getChillModeStatusListener()); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, + (BlockManagerImpl) scmBlockManager); + scmChillModeManager = new SCMChillModeManager(conf, + containerManager.getContainers(), + eventQueue); + + eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + scmChillModeManager); registerMXBean(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java index ed50a9f649a..dfcda5f2698 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java @@ -229,7 +229,7 @@ public void testIsScmInChillModeAndForceExit() throws Exception { } - @Test + @Test(timeout=300_000) public void testSCMChillMode() throws Exception { MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf) .setHbInterval(1000) @@ -360,4 +360,24 @@ public void testSCMChillModeRestrictedOp() throws Exception { .getContainerWithPipeline(containers.get(0).getContainerID())); } + @Test(timeout = 300_000) + public void testSCMChillModeDisabled() throws Exception { + cluster.stop(); + + // If chill mode is disabled, cluster should not be in chill mode even if + // min number of datanodes are not started. + conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false); + conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, 3); + builder = MiniOzoneCluster.newBuilder(conf) + .setHbInterval(1000) + .setHbProcessorInterval(500) + .setNumDatanodes(1); + cluster = builder.build(); + StorageContainerManager scm = cluster.getStorageContainerManager(); + assertFalse(scm.isInChillMode()); + + // Even on SCM restart, cluster should be out of chill mode immediately. + cluster.restartStorageContainerManager(); + assertFalse(scm.isInChillMode()); + } }