HDDS-1193. Refactor ContainerChillModeRule and DatanodeChillMode rule. (#534)
* HDDS-1193. Refactor ContainerChillModeRule and DatanodeChillMode rule.
This commit is contained in:
parent
7fd890116a
commit
313e8b9f13
|
@ -29,12 +29,15 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
|
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import org.apache.hadoop.hdds.server.events.EventHandler;
|
||||||
|
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class defining Chill mode exit criteria for Containers.
|
* Class defining Chill mode exit criteria for Containers.
|
||||||
*/
|
*/
|
||||||
public class ContainerChillModeRule implements
|
public class ContainerChillModeRule implements
|
||||||
ChillModeExitRule<NodeRegistrationContainerReport> {
|
ChillModeExitRule<NodeRegistrationContainerReport>,
|
||||||
|
EventHandler<NodeRegistrationContainerReport> {
|
||||||
|
|
||||||
// Required cutoff % for containers with at least 1 reported replica.
|
// Required cutoff % for containers with at least 1 reported replica.
|
||||||
private double chillModeCutoff;
|
private double chillModeCutoff;
|
||||||
|
@ -68,9 +71,6 @@ public class ContainerChillModeRule implements
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean validate() {
|
public boolean validate() {
|
||||||
if (maxContainer == 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return getCurrentContainerThreshold() >= chillModeCutoff;
|
return getCurrentContainerThreshold() >= chillModeCutoff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,10 +84,6 @@ public class ContainerChillModeRule implements
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void process(NodeRegistrationContainerReport reportsProto) {
|
public void process(NodeRegistrationContainerReport reportsProto) {
|
||||||
if (maxContainer == 0) {
|
|
||||||
// No container to check.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
reportsProto.getReport().getReportsList().forEach(c -> {
|
reportsProto.getReport().getReportsList().forEach(c -> {
|
||||||
if (containerMap.containsKey(c.getContainerID())) {
|
if (containerMap.containsKey(c.getContainerID())) {
|
||||||
|
@ -96,12 +92,33 @@ public class ContainerChillModeRule implements
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onMessage(NodeRegistrationContainerReport
|
||||||
|
nodeRegistrationContainerReport, EventPublisher publisher) {
|
||||||
|
|
||||||
|
// TODO: when we have remove handlers, we can remove getInChillmode check
|
||||||
|
|
||||||
|
if (chillModeManager.getInChillMode()) {
|
||||||
|
if (validate()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
process(nodeRegistrationContainerReport);
|
||||||
if (chillModeManager.getInChillMode()) {
|
if (chillModeManager.getInChillMode()) {
|
||||||
SCMChillModeManager.getLogger().info(
|
SCMChillModeManager.getLogger().info(
|
||||||
"SCM in chill mode. {} % containers have at least one"
|
"SCM in chill mode. {} % containers have at least one"
|
||||||
+ " reported replica.",
|
+ " reported replica.",
|
||||||
(containerWithMinReplicas.get() / maxContainer) * 100);
|
(containerWithMinReplicas.get() / maxContainer) * 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (validate()) {
|
||||||
|
chillModeManager.validateChillModeExitRules(publisher);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -25,13 +25,16 @@ import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
|
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import org.apache.hadoop.hdds.server.events.EventHandler;
|
||||||
|
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class defining Chill mode exit criteria according to number of DataNodes
|
* Class defining Chill mode exit criteria according to number of DataNodes
|
||||||
* registered with SCM.
|
* registered with SCM.
|
||||||
*/
|
*/
|
||||||
public class DataNodeChillModeRule implements
|
public class DataNodeChillModeRule implements
|
||||||
ChillModeExitRule<NodeRegistrationContainerReport> {
|
ChillModeExitRule<NodeRegistrationContainerReport>,
|
||||||
|
EventHandler<NodeRegistrationContainerReport> {
|
||||||
|
|
||||||
// Min DataNodes required to exit chill mode.
|
// Min DataNodes required to exit chill mode.
|
||||||
private int requiredDns;
|
private int requiredDns;
|
||||||
|
@ -62,18 +65,34 @@ public class DataNodeChillModeRule implements
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void process(NodeRegistrationContainerReport reportsProto) {
|
public void process(NodeRegistrationContainerReport reportsProto) {
|
||||||
if (requiredDns == 0) {
|
|
||||||
// No dn check required.
|
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
||||||
|
registeredDns = registeredDnSet.size();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onMessage(NodeRegistrationContainerReport
|
||||||
|
nodeRegistrationContainerReport, EventPublisher publisher) {
|
||||||
|
// TODO: when we have remove handlers, we can remove getInChillmode check
|
||||||
|
|
||||||
|
if (chillModeManager.getInChillMode()) {
|
||||||
|
if (validate()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
process(nodeRegistrationContainerReport);
|
||||||
|
|
||||||
if (chillModeManager.getInChillMode()) {
|
if (chillModeManager.getInChillMode()) {
|
||||||
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
|
||||||
registeredDns = registeredDnSet.size();
|
|
||||||
SCMChillModeManager.getLogger().info(
|
SCMChillModeManager.getLogger().info(
|
||||||
"SCM in chill mode. {} DataNodes registered, {} required.",
|
"SCM in chill mode. {} DataNodes registered, {} required.",
|
||||||
registeredDns, requiredDns);
|
registeredDns, requiredDns);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (validate()) {
|
||||||
|
chillModeManager.validateChillModeExitRules(publisher);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -28,9 +28,6 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
|
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
|
||||||
import org.apache.hadoop.hdds.scm.pipeline.RatisPipelineUtils;
|
import org.apache.hadoop.hdds.scm.pipeline.RatisPipelineUtils;
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer
|
|
||||||
.NodeRegistrationContainerReport;
|
|
||||||
import org.apache.hadoop.hdds.server.events.EventHandler;
|
|
||||||
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||||
import org.apache.hadoop.hdds.server.events.EventQueue;
|
import org.apache.hadoop.hdds.server.events.EventQueue;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -48,8 +45,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* for reported containers and validates if cutoff threshold for
|
* for reported containers and validates if cutoff threshold for
|
||||||
* containers is meet.
|
* containers is meet.
|
||||||
*/
|
*/
|
||||||
public class SCMChillModeManager implements
|
public class SCMChillModeManager {
|
||||||
EventHandler<NodeRegistrationContainerReport> {
|
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(SCMChillModeManager.class);
|
LoggerFactory.getLogger(SCMChillModeManager.class);
|
||||||
|
@ -78,9 +74,16 @@ public class SCMChillModeManager implements
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
||||||
if (isChillModeEnabled) {
|
if (isChillModeEnabled) {
|
||||||
exitRules.put(CONT_EXIT_RULE,
|
ContainerChillModeRule containerChillModeRule =
|
||||||
new ContainerChillModeRule(config, allContainers, this));
|
new ContainerChillModeRule(config, allContainers, this);
|
||||||
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config, this));
|
DataNodeChillModeRule dataNodeChillModeRule =
|
||||||
|
new DataNodeChillModeRule(config, this);
|
||||||
|
exitRules.put(CONT_EXIT_RULE, containerChillModeRule);
|
||||||
|
exitRules.put(DN_EXIT_RULE, dataNodeChillModeRule);
|
||||||
|
eventPublisher.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
containerChillModeRule);
|
||||||
|
eventPublisher.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
dataNodeChillModeRule);
|
||||||
|
|
||||||
if (conf.getBoolean(
|
if (conf.getBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||||
|
@ -146,17 +149,6 @@ public class SCMChillModeManager implements
|
||||||
.scheduleFixedIntervalPipelineCreator(pipelineManager, config);
|
.scheduleFixedIntervalPipelineCreator(pipelineManager, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onMessage(
|
|
||||||
NodeRegistrationContainerReport nodeRegistrationContainerReport,
|
|
||||||
EventPublisher publisher) {
|
|
||||||
if (getInChillMode()) {
|
|
||||||
exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
|
|
||||||
exitRules.get(DN_EXIT_RULE).process(nodeRegistrationContainerReport);
|
|
||||||
validateChillModeExitRules(publisher);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean getInChillMode() {
|
public boolean getInChillMode() {
|
||||||
if (!isChillModeEnabled) {
|
if (!isChillModeEnabled) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -364,8 +364,6 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
replicationStatus.getChillModeStatusListener());
|
replicationStatus.getChillModeStatusListener());
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||||
(BlockManagerImpl) scmBlockManager);
|
(BlockManagerImpl) scmBlockManager);
|
||||||
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
|
||||||
scmChillModeManager);
|
|
||||||
registerMXBean();
|
registerMXBean();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -90,8 +90,7 @@ public class TestSCMChillModeManager {
|
||||||
}
|
}
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmChillModeManager = new SCMChillModeManager(
|
||||||
config, containers, null, queue);
|
config, containers, null, queue);
|
||||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
|
||||||
scmChillModeManager);
|
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
|
@ -111,8 +110,7 @@ public class TestSCMChillModeManager {
|
||||||
}
|
}
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmChillModeManager = new SCMChillModeManager(
|
||||||
config, containers, null, queue);
|
config, containers, null, queue);
|
||||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
|
||||||
scmChillModeManager);
|
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
|
||||||
testContainerThreshold(containers.subList(0, 25), 0.25);
|
testContainerThreshold(containers.subList(0, 25), 0.25);
|
||||||
|
@ -167,8 +165,7 @@ public class TestSCMChillModeManager {
|
||||||
|
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmChillModeManager = new SCMChillModeManager(
|
||||||
config, containers, null, queue);
|
config, containers, null, queue);
|
||||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
|
||||||
scmChillModeManager);
|
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
|
||||||
// When 10 CLOSED containers are reported by DNs, the computed container
|
// When 10 CLOSED containers are reported by DNs, the computed container
|
||||||
|
@ -192,8 +189,7 @@ public class TestSCMChillModeManager {
|
||||||
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
|
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmChillModeManager = new SCMChillModeManager(
|
||||||
conf, containers, null, queue);
|
conf, containers, null, queue);
|
||||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
|
||||||
scmChillModeManager);
|
|
||||||
// Assert SCM is in Chill mode.
|
// Assert SCM is in Chill mode.
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
|
||||||
|
@ -256,8 +252,6 @@ public class TestSCMChillModeManager {
|
||||||
|
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmChillModeManager = new SCMChillModeManager(
|
||||||
config, containers, pipelineManager, queue);
|
config, containers, pipelineManager, queue);
|
||||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
|
||||||
scmChillModeManager);
|
|
||||||
|
|
||||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
|
|
Loading…
Reference in New Issue