HDDS-571. Update SCM chill mode exit criteria to optionally wait for n datanodes. Contributed by Ajay Kumar.
This commit is contained in:
parent
9bb2801e8c
commit
cdf5d58364
@ -83,6 +83,9 @@ private HddsConfigKeys() {
|
|||||||
public static final String HDDS_SCM_CHILLMODE_ENABLED =
|
public static final String HDDS_SCM_CHILLMODE_ENABLED =
|
||||||
"hdds.scm.chillmode.enabled";
|
"hdds.scm.chillmode.enabled";
|
||||||
public static final boolean HDDS_SCM_CHILLMODE_ENABLED_DEFAULT = true;
|
public static final boolean HDDS_SCM_CHILLMODE_ENABLED_DEFAULT = true;
|
||||||
|
public static final String HDDS_SCM_CHILLMODE_MIN_DATANODE =
|
||||||
|
"hdds.scm.chillmode.min.datanode";
|
||||||
|
public static final int HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT = 1;
|
||||||
|
|
||||||
// % of containers which should have at least one reported replica
|
// % of containers which should have at least one reported replica
|
||||||
// before SCM comes out of chill mode.
|
// before SCM comes out of chill mode.
|
||||||
|
@ -1164,6 +1164,15 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>hdds.scm.chillmode.min.datanode</name>
|
||||||
|
<value>1</value>
|
||||||
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
|
<description>Minimum DataNodes which should be registered to get SCM out of
|
||||||
|
chill mode.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.container.action.max.limit</name>
|
<name>hdds.container.action.max.limit</name>
|
||||||
<value>20</value>
|
<value>20</value>
|
||||||
|
@ -20,8 +20,10 @@
|
|||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
@ -60,14 +62,16 @@ public class SCMChillModeManager implements
|
|||||||
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
||||||
private Configuration config;
|
private Configuration config;
|
||||||
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
|
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
|
||||||
|
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
|
||||||
private final EventQueue eventPublisher;
|
private final EventQueue eventPublisher;
|
||||||
|
|
||||||
SCMChillModeManager(Configuration conf, List<ContainerInfo> allContainers,
|
SCMChillModeManager(Configuration conf, List<ContainerInfo> allContainers,
|
||||||
EventQueue eventQueue) {
|
EventQueue eventQueue) {
|
||||||
this.config = conf;
|
this.config = conf;
|
||||||
this.eventPublisher = eventQueue;
|
this.eventPublisher = eventQueue;
|
||||||
exitRules
|
exitRules.put(CONT_EXIT_RULE,
|
||||||
.put(CONT_EXIT_RULE, new ContainerChillModeRule(config, allContainers));
|
new ContainerChillModeRule(config, allContainers));
|
||||||
|
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
|
||||||
if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
|
||||||
exitChillMode(eventQueue);
|
exitChillMode(eventQueue);
|
||||||
@ -120,6 +124,7 @@ public void onMessage(
|
|||||||
EventPublisher publisher) {
|
EventPublisher publisher) {
|
||||||
if (getInChillMode()) {
|
if (getInChillMode()) {
|
||||||
exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
|
exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
|
||||||
|
exitRules.get(DN_EXIT_RULE).process(nodeRegistrationContainerReport);
|
||||||
validateChillModeExitRules(publisher);
|
validateChillModeExitRules(publisher);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -187,6 +192,9 @@ public boolean validate() {
|
|||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public double getCurrentContainerThreshold() {
|
public double getCurrentContainerThreshold() {
|
||||||
|
if (maxContainer == 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
return (containerWithMinReplicas.doubleValue() / maxContainer);
|
return (containerWithMinReplicas.doubleValue() / maxContainer);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,6 +225,57 @@ public void cleanup() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class defining Chill mode exit criteria according to number of DataNodes
|
||||||
|
* registered with SCM.
|
||||||
|
*/
|
||||||
|
public class DataNodeChillModeRule implements
|
||||||
|
ChillModeExitRule<NodeRegistrationContainerReport> {
|
||||||
|
|
||||||
|
// Min DataNodes required to exit chill mode.
|
||||||
|
private int requiredDns;
|
||||||
|
private int registeredDns = 0;
|
||||||
|
// Set to track registered DataNodes.
|
||||||
|
private HashSet<UUID> registeredDnSet;
|
||||||
|
|
||||||
|
public DataNodeChillModeRule(Configuration conf) {
|
||||||
|
requiredDns = conf
|
||||||
|
.getInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE,
|
||||||
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT);
|
||||||
|
registeredDnSet = new HashSet<>(requiredDns * 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean validate() {
|
||||||
|
return registeredDns >= requiredDns;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getRegisteredDataNodes() {
|
||||||
|
return registeredDns;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void process(NodeRegistrationContainerReport reportsProto) {
|
||||||
|
if (requiredDns == 0) {
|
||||||
|
// No dn check required.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(inChillMode.get()) {
|
||||||
|
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
||||||
|
registeredDns = registeredDnSet.size();
|
||||||
|
LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",
|
||||||
|
registeredDns, requiredDns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void cleanup() {
|
||||||
|
registeredDnSet.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public static Logger getLogger() {
|
public static Logger getLogger() {
|
||||||
return LOG;
|
return LOG;
|
||||||
|
@ -45,7 +45,7 @@ public class TestSCMChillModeManager {
|
|||||||
private List<ContainerInfo> containers;
|
private List<ContainerInfo> containers;
|
||||||
|
|
||||||
@Rule
|
@Rule
|
||||||
public Timeout timeout = new Timeout(1000 * 20);
|
public Timeout timeout = new Timeout(1000 * 35);
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void setUp() {
|
public static void setUp() {
|
||||||
@ -111,6 +111,45 @@ public void testDisableChillMode() {
|
|||||||
assertFalse(scmChillModeManager.getInChillMode());
|
assertFalse(scmChillModeManager.getInChillMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChillModeDataNodeExitRule() throws Exception {
|
||||||
|
containers = new ArrayList<>();
|
||||||
|
testChillModeDataNodes(0);
|
||||||
|
testChillModeDataNodes(3);
|
||||||
|
testChillModeDataNodes(5);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testChillModeDataNodes(int numOfDns) throws Exception {
|
||||||
|
OzoneConfiguration conf = new OzoneConfiguration(config);
|
||||||
|
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
|
||||||
|
scmChillModeManager = new SCMChillModeManager(conf, containers, queue);
|
||||||
|
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
scmChillModeManager);
|
||||||
|
// Assert SCM is in Chill mode.
|
||||||
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
|
||||||
|
// Register all DataNodes except last one and assert SCM is in chill mode.
|
||||||
|
for (int i = 0; i < numOfDns-1; i++) {
|
||||||
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
assertTrue(scmChillModeManager.getCurrentContainerThreshold() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(numOfDns == 0){
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
return scmChillModeManager.getInChillMode();
|
||||||
|
}, 10, 1000 * 10);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Register last DataNode and check that SCM is out of Chill mode.
|
||||||
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
return scmChillModeManager.getInChillMode();
|
||||||
|
}, 10, 1000 * 10);
|
||||||
|
}
|
||||||
|
|
||||||
private void testContainerThreshold(List<ContainerInfo> dnContainers,
|
private void testContainerThreshold(List<ContainerInfo> dnContainers,
|
||||||
double expectedThreshold)
|
double expectedThreshold)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user