HDDS-612. Even after setting hdds.scm.chillmode.enabled to false, SCM allocateblock fails with ChillModePrecheck exception. Contributed by Hanisha Koneru.

This commit is contained in:
Arpit Agarwal 2018-10-18 22:17:27 -07:00
parent 47ad98b2e1
commit dc27408043
6 changed files with 69 additions and 25 deletions

View File

@ -123,7 +123,7 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
blockDeletingService =
new SCMBlockDeletingService(deletedBlockLog, containerManager,
nodeManager, eventPublisher, svcInterval, serviceTimeout, conf);
chillModePrecheck = new ChillModePrecheck();
chillModePrecheck = new ChillModePrecheck(conf);
}
/**

View File

@ -19,6 +19,8 @@
package org.apache.hadoop.hdds.scm.server;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
@ -29,9 +31,20 @@ import org.apache.hadoop.hdds.scm.server.SCMChillModeManager.ChillModeRestricted
* */
public class ChillModePrecheck implements Precheck<ScmOps> {
private AtomicBoolean inChillMode = new AtomicBoolean(true);
private AtomicBoolean inChillMode;
public static final String PRECHECK_TYPE = "ChillModePrecheck";
public ChillModePrecheck(Configuration conf) {
boolean chillModeEnabled = conf.getBoolean(
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
if (chillModeEnabled) {
inChillMode = new AtomicBoolean(true);
} else {
inChillMode = new AtomicBoolean(false);
}
}
public boolean check(ScmOps op) throws SCMException {
if (inChillMode.get() && ChillModeRestrictedOps
.isRestrictedInChillMode(op)) {

View File

@ -58,6 +58,7 @@ public class SCMChillModeManager implements
private static final Logger LOG =
LoggerFactory.getLogger(SCMChillModeManager.class);
private final boolean isChillModeEnabled;
private AtomicBoolean inChillMode = new AtomicBoolean(true);
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
@ -70,14 +71,17 @@ public class SCMChillModeManager implements
EventQueue eventQueue) {
this.config = conf;
this.eventPublisher = eventQueue;
exitRules.put(CONT_EXIT_RULE,
new ContainerChillModeRule(config, allContainers));
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
this.isChillModeEnabled = conf.getBoolean(
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
if (isChillModeEnabled) {
exitRules.put(CONT_EXIT_RULE,
new ContainerChillModeRule(config, allContainers));
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
emitChillModeStatus();
} else {
exitChillMode(eventQueue);
}
emitChillModeStatus();
}
/**
@ -85,7 +89,7 @@ public class SCMChillModeManager implements
*/
@VisibleForTesting
public void emitChillModeStatus() {
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, inChillMode.get());
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
}
private void validateChillModeExitRules(EventPublisher eventQueue) {
@ -99,7 +103,7 @@ public class SCMChillModeManager implements
/**
* Exit chill mode. It does following actions:
* 1. Set chill mode status to fale.
* 1. Set chill mode status to false.
* 2. Emits START_REPLICATION for ReplicationManager.
* 3. Cleanup resources.
* 4. Emit chill mode status.
@ -131,6 +135,9 @@ public class SCMChillModeManager implements
}
public boolean getInChillMode() {
if (!isChillModeEnabled) {
return false;
}
return inChillMode.get();
}
@ -218,7 +225,7 @@ public class SCMChillModeManager implements
}
}
});
if(inChillMode.get()) {
if(getInChillMode()) {
LOG.info("SCM in chill mode. {} % containers have at least one"
+ " reported replica.",
(containerWithMinReplicas.get() / maxContainer) * 100);
@ -268,7 +275,7 @@ public class SCMChillModeManager implements
return;
}
if(inChillMode.get()) {
if(getInChillMode()) {
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
registeredDns = registeredDnSet.size();
LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",

View File

@ -86,12 +86,13 @@ public class SCMClientProtocolServer implements
private final InetSocketAddress clientRpcAddress;
private final StorageContainerManager scm;
private final OzoneConfiguration conf;
private ChillModePrecheck chillModePrecheck = new ChillModePrecheck();
private ChillModePrecheck chillModePrecheck;
public SCMClientProtocolServer(OzoneConfiguration conf,
StorageContainerManager scm) throws IOException {
this.scm = scm;
this.conf = conf;
chillModePrecheck = new ChillModePrecheck(conf);
final int handlerCount =
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
OZONE_SCM_HANDLER_COUNT_DEFAULT);
@ -357,8 +358,8 @@ public class SCMClientProtocolServer implements
* Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event.
*/
@Override
public void onMessage(Boolean inChillMOde, EventPublisher publisher) {
chillModePrecheck.setInChillMode(inChillMOde);
public void onMessage(Boolean inChillMode, EventPublisher publisher) {
chillModePrecheck.setInChillMode(inChillMode);
}
/**

View File

@ -230,9 +230,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
ContainerReportHandler containerReportHandler =
new ContainerReportHandler(containerManager, scmNodeManager,
replicationStatus);
scmChillModeManager = new SCMChillModeManager(conf,
containerManager.getContainers(),
eventQueue);
PipelineActionEventHandler pipelineActionEventHandler =
new PipelineActionEventHandler();
@ -292,8 +290,6 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler);
eventQueue.addHandler(SCMEvents.START_REPLICATION,
replicationStatus.getReplicationStatusListener());
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
replicationStatus.getChillModeStatusListener());
eventQueue
.addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler);
eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS,
@ -301,13 +297,20 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
pipelineActionEventHandler);
eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
scmChillModeManager);
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
(BlockManagerImpl) scmBlockManager);
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer);
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
replicationStatus.getChillModeStatusListener());
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
(BlockManagerImpl) scmBlockManager);
scmChillModeManager = new SCMChillModeManager(conf,
containerManager.getContainers(),
eventQueue);
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
scmChillModeManager);
registerMXBean();
}

View File

@ -229,7 +229,7 @@ public class TestScmChillMode {
}
@Test
@Test(timeout=300_000)
public void testSCMChillMode() throws Exception {
MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
.setHbInterval(1000)
@ -360,4 +360,24 @@ public class TestScmChillMode {
.getContainerWithPipeline(containers.get(0).getContainerID()));
}
@Test(timeout = 300_000)
public void testSCMChillModeDisabled() throws Exception {
cluster.stop();
// If chill mode is disabled, cluster should not be in chill mode even if
// min number of datanodes are not started.
conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, 3);
builder = MiniOzoneCluster.newBuilder(conf)
.setHbInterval(1000)
.setHbProcessorInterval(500)
.setNumDatanodes(1);
cluster = builder.build();
StorageContainerManager scm = cluster.getStorageContainerManager();
assertFalse(scm.isInChillMode());
// Even on SCM restart, cluster should be out of chill mode immediately.
cluster.restartStorageContainerManager();
assertFalse(scm.isInChillMode());
}
}