HDDS-612. Even after setting hdds.scm.chillmode.enabled to false, SCM allocateblock fails with ChillModePrecheck exception. Contributed by Hanisha Koneru.
This commit is contained in:
parent
47ad98b2e1
commit
dc27408043
|
@ -123,7 +123,7 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
|
|||
blockDeletingService =
|
||||
new SCMBlockDeletingService(deletedBlockLog, containerManager,
|
||||
nodeManager, eventPublisher, svcInterval, serviceTimeout, conf);
|
||||
chillModePrecheck = new ChillModePrecheck();
|
||||
chillModePrecheck = new ChillModePrecheck(conf);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
package org.apache.hadoop.hdds.scm.server;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
|
||||
|
@ -29,9 +31,20 @@ import org.apache.hadoop.hdds.scm.server.SCMChillModeManager.ChillModeRestricted
|
|||
* */
|
||||
public class ChillModePrecheck implements Precheck<ScmOps> {
|
||||
|
||||
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
||||
private AtomicBoolean inChillMode;
|
||||
public static final String PRECHECK_TYPE = "ChillModePrecheck";
|
||||
|
||||
public ChillModePrecheck(Configuration conf) {
|
||||
boolean chillModeEnabled = conf.getBoolean(
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
||||
if (chillModeEnabled) {
|
||||
inChillMode = new AtomicBoolean(true);
|
||||
} else {
|
||||
inChillMode = new AtomicBoolean(false);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean check(ScmOps op) throws SCMException {
|
||||
if (inChillMode.get() && ChillModeRestrictedOps
|
||||
.isRestrictedInChillMode(op)) {
|
||||
|
|
|
@ -58,6 +58,7 @@ public class SCMChillModeManager implements
|
|||
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(SCMChillModeManager.class);
|
||||
private final boolean isChillModeEnabled;
|
||||
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
||||
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
|
||||
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
||||
|
@ -70,14 +71,17 @@ public class SCMChillModeManager implements
|
|||
EventQueue eventQueue) {
|
||||
this.config = conf;
|
||||
this.eventPublisher = eventQueue;
|
||||
exitRules.put(CONT_EXIT_RULE,
|
||||
new ContainerChillModeRule(config, allContainers));
|
||||
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
|
||||
if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
|
||||
this.isChillModeEnabled = conf.getBoolean(
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
||||
if (isChillModeEnabled) {
|
||||
exitRules.put(CONT_EXIT_RULE,
|
||||
new ContainerChillModeRule(config, allContainers));
|
||||
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
|
||||
emitChillModeStatus();
|
||||
} else {
|
||||
exitChillMode(eventQueue);
|
||||
}
|
||||
emitChillModeStatus();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -85,7 +89,7 @@ public class SCMChillModeManager implements
|
|||
*/
|
||||
@VisibleForTesting
|
||||
public void emitChillModeStatus() {
|
||||
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, inChillMode.get());
|
||||
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
|
||||
}
|
||||
|
||||
private void validateChillModeExitRules(EventPublisher eventQueue) {
|
||||
|
@ -99,7 +103,7 @@ public class SCMChillModeManager implements
|
|||
|
||||
/**
|
||||
* Exit chill mode. It does following actions:
|
||||
* 1. Set chill mode status to fale.
|
||||
* 1. Set chill mode status to false.
|
||||
* 2. Emits START_REPLICATION for ReplicationManager.
|
||||
* 3. Cleanup resources.
|
||||
* 4. Emit chill mode status.
|
||||
|
@ -131,6 +135,9 @@ public class SCMChillModeManager implements
|
|||
}
|
||||
|
||||
public boolean getInChillMode() {
|
||||
if (!isChillModeEnabled) {
|
||||
return false;
|
||||
}
|
||||
return inChillMode.get();
|
||||
}
|
||||
|
||||
|
@ -218,7 +225,7 @@ public class SCMChillModeManager implements
|
|||
}
|
||||
}
|
||||
});
|
||||
if(inChillMode.get()) {
|
||||
if(getInChillMode()) {
|
||||
LOG.info("SCM in chill mode. {} % containers have at least one"
|
||||
+ " reported replica.",
|
||||
(containerWithMinReplicas.get() / maxContainer) * 100);
|
||||
|
@ -268,7 +275,7 @@ public class SCMChillModeManager implements
|
|||
return;
|
||||
}
|
||||
|
||||
if(inChillMode.get()) {
|
||||
if(getInChillMode()) {
|
||||
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
||||
registeredDns = registeredDnSet.size();
|
||||
LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",
|
||||
|
|
|
@ -86,12 +86,13 @@ public class SCMClientProtocolServer implements
|
|||
private final InetSocketAddress clientRpcAddress;
|
||||
private final StorageContainerManager scm;
|
||||
private final OzoneConfiguration conf;
|
||||
private ChillModePrecheck chillModePrecheck = new ChillModePrecheck();
|
||||
private ChillModePrecheck chillModePrecheck;
|
||||
|
||||
public SCMClientProtocolServer(OzoneConfiguration conf,
|
||||
StorageContainerManager scm) throws IOException {
|
||||
this.scm = scm;
|
||||
this.conf = conf;
|
||||
chillModePrecheck = new ChillModePrecheck(conf);
|
||||
final int handlerCount =
|
||||
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
|
||||
OZONE_SCM_HANDLER_COUNT_DEFAULT);
|
||||
|
@ -357,8 +358,8 @@ public class SCMClientProtocolServer implements
|
|||
* Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event.
|
||||
*/
|
||||
@Override
|
||||
public void onMessage(Boolean inChillMOde, EventPublisher publisher) {
|
||||
chillModePrecheck.setInChillMode(inChillMOde);
|
||||
public void onMessage(Boolean inChillMode, EventPublisher publisher) {
|
||||
chillModePrecheck.setInChillMode(inChillMode);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -230,9 +230,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
|||
ContainerReportHandler containerReportHandler =
|
||||
new ContainerReportHandler(containerManager, scmNodeManager,
|
||||
replicationStatus);
|
||||
scmChillModeManager = new SCMChillModeManager(conf,
|
||||
containerManager.getContainers(),
|
||||
eventQueue);
|
||||
|
||||
PipelineActionEventHandler pipelineActionEventHandler =
|
||||
new PipelineActionEventHandler();
|
||||
|
||||
|
@ -292,8 +290,6 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
|||
eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler);
|
||||
eventQueue.addHandler(SCMEvents.START_REPLICATION,
|
||||
replicationStatus.getReplicationStatusListener());
|
||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||
replicationStatus.getChillModeStatusListener());
|
||||
eventQueue
|
||||
.addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler);
|
||||
eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS,
|
||||
|
@ -301,13 +297,20 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
|||
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
|
||||
pipelineActionEventHandler);
|
||||
eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
|
||||
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||
(BlockManagerImpl) scmBlockManager);
|
||||
|
||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer);
|
||||
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
|
||||
|
||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||
replicationStatus.getChillModeStatusListener());
|
||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||
(BlockManagerImpl) scmBlockManager);
|
||||
scmChillModeManager = new SCMChillModeManager(conf,
|
||||
containerManager.getContainers(),
|
||||
eventQueue);
|
||||
|
||||
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
registerMXBean();
|
||||
}
|
||||
|
||||
|
|
|
@ -229,7 +229,7 @@ public class TestScmChillMode {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(timeout=300_000)
|
||||
public void testSCMChillMode() throws Exception {
|
||||
MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
|
||||
.setHbInterval(1000)
|
||||
|
@ -360,4 +360,24 @@ public class TestScmChillMode {
|
|||
.getContainerWithPipeline(containers.get(0).getContainerID()));
|
||||
}
|
||||
|
||||
@Test(timeout = 300_000)
|
||||
public void testSCMChillModeDisabled() throws Exception {
|
||||
cluster.stop();
|
||||
|
||||
// If chill mode is disabled, cluster should not be in chill mode even if
|
||||
// min number of datanodes are not started.
|
||||
conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
|
||||
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, 3);
|
||||
builder = MiniOzoneCluster.newBuilder(conf)
|
||||
.setHbInterval(1000)
|
||||
.setHbProcessorInterval(500)
|
||||
.setNumDatanodes(1);
|
||||
cluster = builder.build();
|
||||
StorageContainerManager scm = cluster.getStorageContainerManager();
|
||||
assertFalse(scm.isInChillMode());
|
||||
|
||||
// Even on SCM restart, cluster should be out of chill mode immediately.
|
||||
cluster.restartStorageContainerManager();
|
||||
assertFalse(scm.isInChillMode());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue