HDDS-612. Even after setting hdds.scm.chillmode.enabled to false, SCM allocateblock fails with ChillModePrecheck exception. Contributed by Hanisha Koneru.
This commit is contained in:
parent
47ad98b2e1
commit
dc27408043
|
@ -123,7 +123,7 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
|
||||||
blockDeletingService =
|
blockDeletingService =
|
||||||
new SCMBlockDeletingService(deletedBlockLog, containerManager,
|
new SCMBlockDeletingService(deletedBlockLog, containerManager,
|
||||||
nodeManager, eventPublisher, svcInterval, serviceTimeout, conf);
|
nodeManager, eventPublisher, svcInterval, serviceTimeout, conf);
|
||||||
chillModePrecheck = new ChillModePrecheck();
|
chillModePrecheck = new ChillModePrecheck(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
package org.apache.hadoop.hdds.scm.server;
|
package org.apache.hadoop.hdds.scm.server;
|
||||||
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
|
||||||
|
@ -29,9 +31,20 @@ import org.apache.hadoop.hdds.scm.server.SCMChillModeManager.ChillModeRestricted
|
||||||
* */
|
* */
|
||||||
public class ChillModePrecheck implements Precheck<ScmOps> {
|
public class ChillModePrecheck implements Precheck<ScmOps> {
|
||||||
|
|
||||||
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
private AtomicBoolean inChillMode;
|
||||||
public static final String PRECHECK_TYPE = "ChillModePrecheck";
|
public static final String PRECHECK_TYPE = "ChillModePrecheck";
|
||||||
|
|
||||||
|
public ChillModePrecheck(Configuration conf) {
|
||||||
|
boolean chillModeEnabled = conf.getBoolean(
|
||||||
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
||||||
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
||||||
|
if (chillModeEnabled) {
|
||||||
|
inChillMode = new AtomicBoolean(true);
|
||||||
|
} else {
|
||||||
|
inChillMode = new AtomicBoolean(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public boolean check(ScmOps op) throws SCMException {
|
public boolean check(ScmOps op) throws SCMException {
|
||||||
if (inChillMode.get() && ChillModeRestrictedOps
|
if (inChillMode.get() && ChillModeRestrictedOps
|
||||||
.isRestrictedInChillMode(op)) {
|
.isRestrictedInChillMode(op)) {
|
||||||
|
|
|
@ -58,6 +58,7 @@ public class SCMChillModeManager implements
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(SCMChillModeManager.class);
|
LoggerFactory.getLogger(SCMChillModeManager.class);
|
||||||
|
private final boolean isChillModeEnabled;
|
||||||
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
||||||
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
|
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
|
||||||
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
||||||
|
@ -70,14 +71,17 @@ public class SCMChillModeManager implements
|
||||||
EventQueue eventQueue) {
|
EventQueue eventQueue) {
|
||||||
this.config = conf;
|
this.config = conf;
|
||||||
this.eventPublisher = eventQueue;
|
this.eventPublisher = eventQueue;
|
||||||
|
this.isChillModeEnabled = conf.getBoolean(
|
||||||
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
||||||
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
||||||
|
if (isChillModeEnabled) {
|
||||||
exitRules.put(CONT_EXIT_RULE,
|
exitRules.put(CONT_EXIT_RULE,
|
||||||
new ContainerChillModeRule(config, allContainers));
|
new ContainerChillModeRule(config, allContainers));
|
||||||
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
|
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
|
||||||
if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
emitChillModeStatus();
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
|
} else {
|
||||||
exitChillMode(eventQueue);
|
exitChillMode(eventQueue);
|
||||||
}
|
}
|
||||||
emitChillModeStatus();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -85,7 +89,7 @@ public class SCMChillModeManager implements
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public void emitChillModeStatus() {
|
public void emitChillModeStatus() {
|
||||||
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, inChillMode.get());
|
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateChillModeExitRules(EventPublisher eventQueue) {
|
private void validateChillModeExitRules(EventPublisher eventQueue) {
|
||||||
|
@ -99,7 +103,7 @@ public class SCMChillModeManager implements
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Exit chill mode. It does following actions:
|
* Exit chill mode. It does following actions:
|
||||||
* 1. Set chill mode status to fale.
|
* 1. Set chill mode status to false.
|
||||||
* 2. Emits START_REPLICATION for ReplicationManager.
|
* 2. Emits START_REPLICATION for ReplicationManager.
|
||||||
* 3. Cleanup resources.
|
* 3. Cleanup resources.
|
||||||
* 4. Emit chill mode status.
|
* 4. Emit chill mode status.
|
||||||
|
@ -131,6 +135,9 @@ public class SCMChillModeManager implements
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getInChillMode() {
|
public boolean getInChillMode() {
|
||||||
|
if (!isChillModeEnabled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return inChillMode.get();
|
return inChillMode.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,7 +225,7 @@ public class SCMChillModeManager implements
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
if(inChillMode.get()) {
|
if(getInChillMode()) {
|
||||||
LOG.info("SCM in chill mode. {} % containers have at least one"
|
LOG.info("SCM in chill mode. {} % containers have at least one"
|
||||||
+ " reported replica.",
|
+ " reported replica.",
|
||||||
(containerWithMinReplicas.get() / maxContainer) * 100);
|
(containerWithMinReplicas.get() / maxContainer) * 100);
|
||||||
|
@ -268,7 +275,7 @@ public class SCMChillModeManager implements
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(inChillMode.get()) {
|
if(getInChillMode()) {
|
||||||
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
||||||
registeredDns = registeredDnSet.size();
|
registeredDns = registeredDnSet.size();
|
||||||
LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",
|
LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",
|
||||||
|
|
|
@ -86,12 +86,13 @@ public class SCMClientProtocolServer implements
|
||||||
private final InetSocketAddress clientRpcAddress;
|
private final InetSocketAddress clientRpcAddress;
|
||||||
private final StorageContainerManager scm;
|
private final StorageContainerManager scm;
|
||||||
private final OzoneConfiguration conf;
|
private final OzoneConfiguration conf;
|
||||||
private ChillModePrecheck chillModePrecheck = new ChillModePrecheck();
|
private ChillModePrecheck chillModePrecheck;
|
||||||
|
|
||||||
public SCMClientProtocolServer(OzoneConfiguration conf,
|
public SCMClientProtocolServer(OzoneConfiguration conf,
|
||||||
StorageContainerManager scm) throws IOException {
|
StorageContainerManager scm) throws IOException {
|
||||||
this.scm = scm;
|
this.scm = scm;
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
chillModePrecheck = new ChillModePrecheck(conf);
|
||||||
final int handlerCount =
|
final int handlerCount =
|
||||||
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
|
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
|
||||||
OZONE_SCM_HANDLER_COUNT_DEFAULT);
|
OZONE_SCM_HANDLER_COUNT_DEFAULT);
|
||||||
|
@ -357,8 +358,8 @@ public class SCMClientProtocolServer implements
|
||||||
* Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event.
|
* Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void onMessage(Boolean inChillMOde, EventPublisher publisher) {
|
public void onMessage(Boolean inChillMode, EventPublisher publisher) {
|
||||||
chillModePrecheck.setInChillMode(inChillMOde);
|
chillModePrecheck.setInChillMode(inChillMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -230,9 +230,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
ContainerReportHandler containerReportHandler =
|
ContainerReportHandler containerReportHandler =
|
||||||
new ContainerReportHandler(containerManager, scmNodeManager,
|
new ContainerReportHandler(containerManager, scmNodeManager,
|
||||||
replicationStatus);
|
replicationStatus);
|
||||||
scmChillModeManager = new SCMChillModeManager(conf,
|
|
||||||
containerManager.getContainers(),
|
|
||||||
eventQueue);
|
|
||||||
PipelineActionEventHandler pipelineActionEventHandler =
|
PipelineActionEventHandler pipelineActionEventHandler =
|
||||||
new PipelineActionEventHandler();
|
new PipelineActionEventHandler();
|
||||||
|
|
||||||
|
@ -292,8 +290,6 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler);
|
eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler);
|
||||||
eventQueue.addHandler(SCMEvents.START_REPLICATION,
|
eventQueue.addHandler(SCMEvents.START_REPLICATION,
|
||||||
replicationStatus.getReplicationStatusListener());
|
replicationStatus.getReplicationStatusListener());
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
|
||||||
replicationStatus.getChillModeStatusListener());
|
|
||||||
eventQueue
|
eventQueue
|
||||||
.addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler);
|
.addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler);
|
||||||
eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS,
|
eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS,
|
||||||
|
@ -301,13 +297,20 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
|
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
|
||||||
pipelineActionEventHandler);
|
pipelineActionEventHandler);
|
||||||
eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
|
eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
|
||||||
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
|
||||||
scmChillModeManager);
|
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
|
||||||
(BlockManagerImpl) scmBlockManager);
|
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer);
|
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer);
|
||||||
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
|
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
|
||||||
|
|
||||||
|
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||||
|
replicationStatus.getChillModeStatusListener());
|
||||||
|
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||||
|
(BlockManagerImpl) scmBlockManager);
|
||||||
|
scmChillModeManager = new SCMChillModeManager(conf,
|
||||||
|
containerManager.getContainers(),
|
||||||
|
eventQueue);
|
||||||
|
|
||||||
|
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
scmChillModeManager);
|
||||||
registerMXBean();
|
registerMXBean();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -229,7 +229,7 @@ public class TestScmChillMode {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(timeout=300_000)
|
||||||
public void testSCMChillMode() throws Exception {
|
public void testSCMChillMode() throws Exception {
|
||||||
MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
|
MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
|
||||||
.setHbInterval(1000)
|
.setHbInterval(1000)
|
||||||
|
@ -360,4 +360,24 @@ public class TestScmChillMode {
|
||||||
.getContainerWithPipeline(containers.get(0).getContainerID()));
|
.getContainerWithPipeline(containers.get(0).getContainerID()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 300_000)
|
||||||
|
public void testSCMChillModeDisabled() throws Exception {
|
||||||
|
cluster.stop();
|
||||||
|
|
||||||
|
// If chill mode is disabled, cluster should not be in chill mode even if
|
||||||
|
// min number of datanodes are not started.
|
||||||
|
conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
|
||||||
|
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, 3);
|
||||||
|
builder = MiniOzoneCluster.newBuilder(conf)
|
||||||
|
.setHbInterval(1000)
|
||||||
|
.setHbProcessorInterval(500)
|
||||||
|
.setNumDatanodes(1);
|
||||||
|
cluster = builder.build();
|
||||||
|
StorageContainerManager scm = cluster.getStorageContainerManager();
|
||||||
|
assertFalse(scm.isInChillMode());
|
||||||
|
|
||||||
|
// Even on SCM restart, cluster should be out of chill mode immediately.
|
||||||
|
cluster.restartStorageContainerManager();
|
||||||
|
assertFalse(scm.isInChillMode());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue