HDDS-612. Even after setting hdds.scm.chillmode.enabled to false, SCM allocateblock fails with ChillModePrecheck exception. Contributed by Hanisha Koneru.

This commit is contained in:
Arpit Agarwal 2018-10-18 22:17:27 -07:00
parent 47ad98b2e1
commit dc27408043
6 changed files with 69 additions and 25 deletions

View File

@ -123,7 +123,7 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
blockDeletingService = blockDeletingService =
new SCMBlockDeletingService(deletedBlockLog, containerManager, new SCMBlockDeletingService(deletedBlockLog, containerManager,
nodeManager, eventPublisher, svcInterval, serviceTimeout, conf); nodeManager, eventPublisher, svcInterval, serviceTimeout, conf);
chillModePrecheck = new ChillModePrecheck(); chillModePrecheck = new ChillModePrecheck(conf);
} }
/** /**

View File

@ -19,6 +19,8 @@
package org.apache.hadoop.hdds.scm.server; package org.apache.hadoop.hdds.scm.server;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
@ -29,9 +31,20 @@ import org.apache.hadoop.hdds.scm.server.SCMChillModeManager.ChillModeRestricted
* */ * */
public class ChillModePrecheck implements Precheck<ScmOps> { public class ChillModePrecheck implements Precheck<ScmOps> {
private AtomicBoolean inChillMode = new AtomicBoolean(true); private AtomicBoolean inChillMode;
public static final String PRECHECK_TYPE = "ChillModePrecheck"; public static final String PRECHECK_TYPE = "ChillModePrecheck";
public ChillModePrecheck(Configuration conf) {
boolean chillModeEnabled = conf.getBoolean(
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
if (chillModeEnabled) {
inChillMode = new AtomicBoolean(true);
} else {
inChillMode = new AtomicBoolean(false);
}
}
public boolean check(ScmOps op) throws SCMException { public boolean check(ScmOps op) throws SCMException {
if (inChillMode.get() && ChillModeRestrictedOps if (inChillMode.get() && ChillModeRestrictedOps
.isRestrictedInChillMode(op)) { .isRestrictedInChillMode(op)) {

View File

@ -58,6 +58,7 @@ public class SCMChillModeManager implements
private static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(SCMChillModeManager.class); LoggerFactory.getLogger(SCMChillModeManager.class);
private final boolean isChillModeEnabled;
private AtomicBoolean inChillMode = new AtomicBoolean(true); private AtomicBoolean inChillMode = new AtomicBoolean(true);
private AtomicLong containerWithMinReplicas = new AtomicLong(0); private AtomicLong containerWithMinReplicas = new AtomicLong(0);
private Map<String, ChillModeExitRule> exitRules = new HashMap(1); private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
@ -70,14 +71,17 @@ public class SCMChillModeManager implements
EventQueue eventQueue) { EventQueue eventQueue) {
this.config = conf; this.config = conf;
this.eventPublisher = eventQueue; this.eventPublisher = eventQueue;
exitRules.put(CONT_EXIT_RULE, this.isChillModeEnabled = conf.getBoolean(
new ContainerChillModeRule(config, allContainers)); HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config)); HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, if (isChillModeEnabled) {
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) { exitRules.put(CONT_EXIT_RULE,
new ContainerChillModeRule(config, allContainers));
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
emitChillModeStatus();
} else {
exitChillMode(eventQueue); exitChillMode(eventQueue);
} }
emitChillModeStatus();
} }
/** /**
@ -85,7 +89,7 @@ public class SCMChillModeManager implements
*/ */
@VisibleForTesting @VisibleForTesting
public void emitChillModeStatus() { public void emitChillModeStatus() {
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, inChillMode.get()); eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
} }
private void validateChillModeExitRules(EventPublisher eventQueue) { private void validateChillModeExitRules(EventPublisher eventQueue) {
@ -99,7 +103,7 @@ public class SCMChillModeManager implements
/** /**
* Exit chill mode. It does following actions: * Exit chill mode. It does following actions:
* 1. Set chill mode status to fale. * 1. Set chill mode status to false.
* 2. Emits START_REPLICATION for ReplicationManager. * 2. Emits START_REPLICATION for ReplicationManager.
* 3. Cleanup resources. * 3. Cleanup resources.
* 4. Emit chill mode status. * 4. Emit chill mode status.
@ -131,6 +135,9 @@ public class SCMChillModeManager implements
} }
public boolean getInChillMode() { public boolean getInChillMode() {
if (!isChillModeEnabled) {
return false;
}
return inChillMode.get(); return inChillMode.get();
} }
@ -218,7 +225,7 @@ public class SCMChillModeManager implements
} }
} }
}); });
if(inChillMode.get()) { if(getInChillMode()) {
LOG.info("SCM in chill mode. {} % containers have at least one" LOG.info("SCM in chill mode. {} % containers have at least one"
+ " reported replica.", + " reported replica.",
(containerWithMinReplicas.get() / maxContainer) * 100); (containerWithMinReplicas.get() / maxContainer) * 100);
@ -268,7 +275,7 @@ public class SCMChillModeManager implements
return; return;
} }
if(inChillMode.get()) { if(getInChillMode()) {
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid()); registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
registeredDns = registeredDnSet.size(); registeredDns = registeredDnSet.size();
LOG.info("SCM in chill mode. {} DataNodes registered, {} required.", LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",

View File

@ -86,12 +86,13 @@ public class SCMClientProtocolServer implements
private final InetSocketAddress clientRpcAddress; private final InetSocketAddress clientRpcAddress;
private final StorageContainerManager scm; private final StorageContainerManager scm;
private final OzoneConfiguration conf; private final OzoneConfiguration conf;
private ChillModePrecheck chillModePrecheck = new ChillModePrecheck(); private ChillModePrecheck chillModePrecheck;
public SCMClientProtocolServer(OzoneConfiguration conf, public SCMClientProtocolServer(OzoneConfiguration conf,
StorageContainerManager scm) throws IOException { StorageContainerManager scm) throws IOException {
this.scm = scm; this.scm = scm;
this.conf = conf; this.conf = conf;
chillModePrecheck = new ChillModePrecheck(conf);
final int handlerCount = final int handlerCount =
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY, conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
OZONE_SCM_HANDLER_COUNT_DEFAULT); OZONE_SCM_HANDLER_COUNT_DEFAULT);
@ -357,8 +358,8 @@ public class SCMClientProtocolServer implements
* Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event. * Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event.
*/ */
@Override @Override
public void onMessage(Boolean inChillMOde, EventPublisher publisher) { public void onMessage(Boolean inChillMode, EventPublisher publisher) {
chillModePrecheck.setInChillMode(inChillMOde); chillModePrecheck.setInChillMode(inChillMode);
} }
/** /**

View File

@ -230,9 +230,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
ContainerReportHandler containerReportHandler = ContainerReportHandler containerReportHandler =
new ContainerReportHandler(containerManager, scmNodeManager, new ContainerReportHandler(containerManager, scmNodeManager,
replicationStatus); replicationStatus);
scmChillModeManager = new SCMChillModeManager(conf,
containerManager.getContainers(),
eventQueue);
PipelineActionEventHandler pipelineActionEventHandler = PipelineActionEventHandler pipelineActionEventHandler =
new PipelineActionEventHandler(); new PipelineActionEventHandler();
@ -292,8 +290,6 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler); eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler);
eventQueue.addHandler(SCMEvents.START_REPLICATION, eventQueue.addHandler(SCMEvents.START_REPLICATION,
replicationStatus.getReplicationStatusListener()); replicationStatus.getReplicationStatusListener());
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
replicationStatus.getChillModeStatusListener());
eventQueue eventQueue
.addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler); .addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler);
eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS, eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS,
@ -301,13 +297,20 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
pipelineActionEventHandler); pipelineActionEventHandler);
eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler); eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
scmChillModeManager);
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
(BlockManagerImpl) scmBlockManager);
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer); eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer);
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler); eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
replicationStatus.getChillModeStatusListener());
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
(BlockManagerImpl) scmBlockManager);
scmChillModeManager = new SCMChillModeManager(conf,
containerManager.getContainers(),
eventQueue);
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
scmChillModeManager);
registerMXBean(); registerMXBean();
} }

View File

@ -229,7 +229,7 @@ public class TestScmChillMode {
} }
@Test @Test(timeout=300_000)
public void testSCMChillMode() throws Exception { public void testSCMChillMode() throws Exception {
MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf) MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
.setHbInterval(1000) .setHbInterval(1000)
@ -360,4 +360,24 @@ public class TestScmChillMode {
.getContainerWithPipeline(containers.get(0).getContainerID())); .getContainerWithPipeline(containers.get(0).getContainerID()));
} }
@Test(timeout = 300_000)
public void testSCMChillModeDisabled() throws Exception {
cluster.stop();
// If chill mode is disabled, cluster should not be in chill mode even if
// min number of datanodes are not started.
conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, 3);
builder = MiniOzoneCluster.newBuilder(conf)
.setHbInterval(1000)
.setHbProcessorInterval(500)
.setNumDatanodes(1);
cluster = builder.build();
StorageContainerManager scm = cluster.getStorageContainerManager();
assertFalse(scm.isInChillMode());
// Even on SCM restart, cluster should be out of chill mode immediately.
cluster.restartStorageContainerManager();
assertFalse(scm.isInChillMode());
}
} }