HDFS-12609. Ozone: SCM: Refactoring of chill mode logic in NodeManager. Contributed by Nandakumar.

This commit is contained in:
yuanbo 2017-10-09 20:43:17 +08:00 committed by Owen O'Malley
parent e3b51d9074
commit e76e10fd7a
11 changed files with 83 additions and 145 deletions

View File

@ -226,7 +226,7 @@ public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean {
INVALID_BLOCK_SIZE); INVALID_BLOCK_SIZE);
} }
if (!nodeManager.isOutOfNodeChillMode()) { if (!nodeManager.isOutOfChillMode()) {
LOG.warn("Not out of Chill mode."); LOG.warn("Not out of Chill mode.");
throw new SCMException("Unable to create block while in chill mode", throw new SCMException("Unable to create block while in chill mode",
CHILL_MODE_EXCEPTION); CHILL_MODE_EXCEPTION);
@ -403,7 +403,7 @@ public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean {
*/ */
@Override @Override
public void deleteBlocks(List<String> blockIDs) throws IOException { public void deleteBlocks(List<String> blockIDs) throws IOException {
if (!nodeManager.isOutOfNodeChillMode()) { if (!nodeManager.isOutOfChillMode()) {
throw new SCMException("Unable to delete block while in chill mode", throw new SCMException("Unable to delete block while in chill mode",
CHILL_MODE_EXCEPTION); CHILL_MODE_EXCEPTION);
} }

View File

@ -183,7 +183,7 @@ public class ContainerMapping implements Mapping {
Preconditions.checkNotNull(containerName); Preconditions.checkNotNull(containerName);
Preconditions.checkState(!containerName.isEmpty()); Preconditions.checkState(!containerName.isEmpty());
ContainerInfo containerInfo = null; ContainerInfo containerInfo = null;
if (!nodeManager.isOutOfNodeChillMode()) { if (!nodeManager.isOutOfChillMode()) {
throw new SCMException( throw new SCMException(
"Unable to create container while in chill mode", "Unable to create container while in chill mode",
SCMException.ResultCodes.CHILL_MODE_EXCEPTION); SCMException.ResultCodes.CHILL_MODE_EXCEPTION);

View File

@ -94,14 +94,14 @@ public interface NodeManager extends StorageContainerNodeProtocol,
void forceExitChillMode(); void forceExitChillMode();
/** /**
* Forcefully enters chill mode, even if all minimum node conditions are met. * Puts the node manager into manual chill mode.
*/ */
void forceEnterChillMode(); void enterChillMode();
/** /**
* Clears the manual chill mode flag. * Brings node manager out of manual chill mode.
*/ */
void clearChillModeFlag(); void exitChillMode();
/** /**
* Returns the aggregated node stats. * Returns the aggregated node stats.

View File

@ -35,13 +35,6 @@ public interface NodeManagerMXBean {
*/ */
int getMinimumChillModeNodes(); int getMinimumChillModeNodes();
/**
* Reports if we have exited out of chill mode by discovering enough nodes.
*
* @return True if we are out of Node layer chill mode, false otherwise.
*/
boolean isOutOfNodeChillMode();
/** /**
* Returns a chill mode status string. * Returns a chill mode status string.
* @return String * @return String
@ -50,11 +43,10 @@ public interface NodeManagerMXBean {
/** /**
* Returns the status of manual chill mode flag. * Returns true if node manager is out of chill mode, else false.
* @return true if forceEnterChillMode has been called, * @return true if out of chill mode, else false
* false if forceExitChillMode or status is not set. eg. clearChillModeFlag.
*/ */
boolean isInManualChillMode(); boolean isOutOfChillMode();
/** /**
* Get the number of data nodes that in all states. * Get the number of data nodes that in all states.

View File

@ -292,25 +292,6 @@ public class SCMNodeManager
chillModeNodeCount = count; chillModeNodeCount = count;
} }
/**
* Reports if we have exited out of chill mode.
*
* @return true if we are out of chill mode.
*/
@Override
public boolean isOutOfNodeChillMode() {
return !inStartupChillMode.get() && !inManualChillMode.get();
}
/**
* Clears the manual chill mode.
*/
@Override
public void clearChillModeFlag() {
LOG.info("Clearing manual chill mode flag.");
this.inManualChillMode.getAndSet(false);
}
/** /**
* Returns chill mode Status string. * Returns chill mode Status string.
* @return String * @return String
@ -318,36 +299,16 @@ public class SCMNodeManager
@Override @Override
public String getChillModeStatus() { public String getChillModeStatus() {
if (inStartupChillMode.get()) { if (inStartupChillMode.get()) {
return "Still in chill mode, waiting on nodes to report in." return "Still in chill mode, waiting on nodes to report in." +
+ getNodeStatus(); String.format(" %d nodes reported, minimal %d nodes required.",
totalNodes.get(), getMinimumChillModeNodes());
} }
if (inManualChillMode.get()) { if (inManualChillMode.get()) {
return "Out of startup chill mode, but in manual chill mode." + return "Out of startup chill mode, but in manual chill mode." +
getNodeStatus(); String.format(" %d nodes have reported in.", totalNodes.get());
} }
return "Out of chill mode." + getNodeStatus(); return "Out of chill mode." +
} String.format(" %d nodes have reported in.", totalNodes.get());
/**
* Returns a node status string.
* @return - String
*/
private String getNodeStatus() {
return isOutOfNodeChillMode() ?
String.format(" %d nodes have reported in.", totalNodes.get()) :
String.format(" %d nodes reported, minimal %d nodes required.",
totalNodes.get(), getMinimumChillModeNodes());
}
/**
* Returns the status of Manual chill Mode flag.
*
* @return true if forceEnterChillMode has been called, false if
* forceExitChillMode or status is not set. eg. clearChillModeFlag.
*/
@Override
public boolean isInManualChillMode() {
return inManualChillMode.get();
} }
/** /**
@ -359,21 +320,39 @@ public class SCMNodeManager
public void forceExitChillMode() { public void forceExitChillMode() {
if(inStartupChillMode.get()) { if(inStartupChillMode.get()) {
LOG.info("Leaving startup chill mode."); LOG.info("Leaving startup chill mode.");
inStartupChillMode.getAndSet(false); inStartupChillMode.set(false);
} }
if(inManualChillMode.get()) { if(inManualChillMode.get()) {
LOG.info("Leaving manual chill mode."); LOG.info("Leaving manual chill mode.");
inManualChillMode.getAndSet(false); inManualChillMode.set(false);
} }
} }
/** /**
* Forcefully enters chill mode, even if all chill mode conditions are met. * Puts the node manager into manual chill mode.
*/ */
@Override @Override
public void forceEnterChillMode() { public void enterChillMode() {
LOG.info("Entering manual chill mode."); LOG.info("Entering manual chill mode.");
inManualChillMode.getAndSet(true); inManualChillMode.set(true);
}
/**
* Brings node manager out of manual chill mode.
*/
@Override
public void exitChillMode() {
LOG.info("Leaving manual chill mode.");
inManualChillMode.set(false);
}
/**
* Returns true if node manager is out of chill mode, else false.
* @return true if out of chill mode, else false
*/
@Override
public boolean isOutOfChillMode() {
return !(inStartupChillMode.get() || inManualChillMode.get());
} }
/** /**

View File

@ -291,7 +291,7 @@ public final class MiniOzoneCluster extends MiniDFSCluster
public void waitTobeOutOfChillMode() throws TimeoutException, public void waitTobeOutOfChillMode() throws TimeoutException,
InterruptedException { InterruptedException {
GenericTestUtils.waitFor(() -> { GenericTestUtils.waitFor(() -> {
if (scm.getScmNodeManager().isOutOfNodeChillMode()) { if (scm.getScmNodeManager().isOutOfChillMode()) {
return true; return true;
} }
LOG.info("Waiting for cluster to be ready. No datanodes found"); LOG.info("Waiting for cluster to be ready. No datanodes found");

View File

@ -59,16 +59,6 @@ public class ReplicationNodeManagerMock implements NodeManager {
return 0; return 0;
} }
/**
* Reports if we have exited out of chill mode by discovering enough nodes.
*
* @return True if we are out of Node layer chill mode, false otherwise.
*/
@Override
public boolean isOutOfNodeChillMode() {
return !nodeStateMap.isEmpty();
}
/** /**
* Returns a chill mode status string. * Returns a chill mode status string.
* *
@ -79,17 +69,6 @@ public class ReplicationNodeManagerMock implements NodeManager {
return null; return null;
} }
/**
* Returns the status of manual chill mode flag.
*
* @return true if forceEnterChillMode has been called, false if
* forceExitChillMode or status is not set. eg. clearChillModeFlag.
*/
@Override
public boolean isInManualChillMode() {
return false;
}
/** /**
* Get the number of data nodes that in all states. * Get the number of data nodes that in all states.
* *
@ -158,21 +137,30 @@ public class ReplicationNodeManagerMock implements NodeManager {
} }
/** /**
* Forcefully enters chill mode, even if all minimum node conditions are met. * Puts the node manager into manual chill mode.
*/ */
@Override @Override
public void forceEnterChillMode() { public void enterChillMode() {
} }
/** /**
* Clears the manual chill mode flag. * Brings node manager out of manual chill mode.
*/ */
@Override @Override
public void clearChillModeFlag() { public void exitChillMode() {
} }
/**
* Returns true if node manager is out of chill mode, else false.
* @return true if out of chill mode, else false
*/
@Override
public boolean isOutOfChillMode() {
return !nodeStateMap.isEmpty();
}
/** /**
* Returns the aggregated node stats. * Returns the aggregated node stats.
* *

View File

@ -108,17 +108,13 @@ public class TestSCMMXBean {
minChillNodes.intValue()); minChillNodes.intValue());
boolean isOutOfChillMode = (boolean)mbs.getAttribute(bean, boolean isOutOfChillMode = (boolean)mbs.getAttribute(bean,
"OutOfNodeChillMode"); "OutOfChillMode");
assertEquals(scmNm.isOutOfNodeChillMode(), isOutOfChillMode); assertEquals(scmNm.isOutOfChillMode(), isOutOfChillMode);
String chillStatus = (String)mbs.getAttribute(bean, String chillStatus = (String)mbs.getAttribute(bean,
"ChillModeStatus"); "ChillModeStatus");
assertEquals(scmNm.getChillModeStatus(), chillStatus); assertEquals(scmNm.getChillModeStatus(), chillStatus);
boolean inManualChillMode = (boolean)mbs.getAttribute(bean,
"InManualChillMode");
assertEquals(scmNm.isInManualChillMode(), inManualChillMode);
TabularData nodeCountObj = (TabularData)mbs.getAttribute(bean, TabularData nodeCountObj = (TabularData)mbs.getAttribute(bean,
"NodeCount"); "NodeCount");
verifyEquals(nodeCountObj, scm.getScmNodeManager().getNodeCount()); verifyEquals(nodeCountObj, scm.getScmNodeManager().getNodeCount());

View File

@ -188,16 +188,6 @@ public class MockNodeManager implements NodeManager {
return 0; return 0;
} }
/**
* Reports if we have exited out of chill mode by discovering enough nodes.
*
* @return True if we are out of Node layer chill mode, false otherwise.
*/
@Override
public boolean isOutOfNodeChillMode() {
return !chillmode;
}
/** /**
* Chill mode is the period when node manager waits for a minimum configured * Chill mode is the period when node manager waits for a minimum configured
* number of datanodes to report in. This is called chill mode to indicate the * number of datanodes to report in. This is called chill mode to indicate the
@ -212,21 +202,30 @@ public class MockNodeManager implements NodeManager {
} }
/** /**
* Forcefully enters chill mode, even if all minimum node conditions are met. * Puts the node manager into manual chill mode.
*/ */
@Override @Override
public void forceEnterChillMode() { public void enterChillMode() {
} }
/** /**
* Clears the manual chill mode flag. * Brings node manager out of manual chill mode.
*/ */
@Override @Override
public void clearChillModeFlag() { public void exitChillMode() {
} }
/**
* Returns true if node manager is out of chill mode, else false.
* @return true if out of chill mode, else false
*/
@Override
public boolean isOutOfChillMode() {
return !chillmode;
}
/** /**
* Returns a chill mode status string. * Returns a chill mode status string.
* *
@ -237,17 +236,6 @@ public class MockNodeManager implements NodeManager {
return null; return null;
} }
/**
* Returns the status of manual chill mode flag.
*
* @return true if forceEnterChillMode has been called, false if
* forceExitChillMode or status is not set. eg. clearChillModeFlag.
*/
@Override
public boolean isInManualChillMode() {
return false;
}
/** /**
* Returns the aggregated node stats. * Returns the aggregated node stats.
* @return the aggregated node stats. * @return the aggregated node stats.

View File

@ -86,7 +86,7 @@ public class TestContainerPlacement {
SCMNodeManager nodeManager = new SCMNodeManager(config, SCMNodeManager nodeManager = new SCMNodeManager(config,
UUID.randomUUID().toString()); UUID.randomUUID().toString());
assertFalse("Node manager should be in chill mode", assertFalse("Node manager should be in chill mode",
nodeManager.isOutOfNodeChillMode()); nodeManager.isOutOfChillMode());
return nodeManager; return nodeManager;
} }
@ -150,7 +150,7 @@ public class TestContainerPlacement {
assertEquals(remaining * nodeCount, assertEquals(remaining * nodeCount,
(long) nodeManager.getStats().getRemaining().get()); (long) nodeManager.getStats().getRemaining().get());
assertTrue(nodeManager.isOutOfNodeChillMode()); assertTrue(nodeManager.isOutOfChillMode());
String container1 = UUID.randomUUID().toString(); String container1 = UUID.randomUUID().toString();
Pipeline pipeline1 = containerManager.allocateContainer( Pipeline pipeline1 = containerManager.allocateContainer(

View File

@ -121,7 +121,7 @@ public class TestNodeManager {
SCMNodeManager nodeManager = new SCMNodeManager(config, SCMNodeManager nodeManager = new SCMNodeManager(config,
UUID.randomUUID().toString()); UUID.randomUUID().toString());
assertFalse("Node manager should be in chill mode", assertFalse("Node manager should be in chill mode",
nodeManager.isOutOfNodeChillMode()); nodeManager.isOutOfChillMode());
return nodeManager; return nodeManager;
} }
@ -150,7 +150,7 @@ public class TestNodeManager {
assertTrue("Heartbeat thread should have picked up the" + assertTrue("Heartbeat thread should have picked up the" +
"scheduled heartbeats and transitioned out of chill mode.", "scheduled heartbeats and transitioned out of chill mode.",
nodeManager.isOutOfNodeChillMode()); nodeManager.isOutOfChillMode());
} }
} }
@ -169,7 +169,7 @@ public class TestNodeManager {
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(), GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(),
100, 4 * 1000); 100, 4 * 1000);
assertFalse("No heartbeats, Node manager should have been in" + assertFalse("No heartbeats, Node manager should have been in" +
" chill mode.", nodeManager.isOutOfNodeChillMode()); " chill mode.", nodeManager.isOutOfChillMode());
} }
} }
@ -191,7 +191,7 @@ public class TestNodeManager {
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(), GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(),
100, 4 * 1000); 100, 4 * 1000);
assertFalse("Not enough heartbeat, Node manager should have" + assertFalse("Not enough heartbeat, Node manager should have" +
"been in chillmode.", nodeManager.isOutOfNodeChillMode()); "been in chillmode.", nodeManager.isOutOfChillMode());
} }
} }
@ -219,7 +219,7 @@ public class TestNodeManager {
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(), GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(),
100, 4 * 1000); 100, 4 * 1000);
assertFalse("Not enough nodes have send heartbeat to node" + assertFalse("Not enough nodes have send heartbeat to node" +
"manager.", nodeManager.isOutOfNodeChillMode()); "manager.", nodeManager.isOutOfChillMode());
} }
} }
@ -899,27 +899,23 @@ public class TestNodeManager {
"mode, waiting on nodes to report in.")); "mode, waiting on nodes to report in."));
// Should not exit chill mode since 10 nodes have not heartbeat yet. // Should not exit chill mode since 10 nodes have not heartbeat yet.
assertFalse(nodeManager.isOutOfNodeChillMode()); assertFalse(nodeManager.isOutOfChillMode());
assertFalse((nodeManager.isInManualChillMode()));
// Force exit chill mode. // Force exit chill mode.
nodeManager.forceExitChillMode(); nodeManager.forceExitChillMode();
assertTrue(nodeManager.isOutOfNodeChillMode()); assertTrue(nodeManager.isOutOfChillMode());
status = nodeManager.getChillModeStatus(); status = nodeManager.getChillModeStatus();
Assert.assertThat(status, Assert.assertThat(status,
CoreMatchers.containsString("Out of chill mode.")); CoreMatchers.containsString("Out of chill mode."));
assertFalse((nodeManager.isInManualChillMode()));
// Enter back to into chill mode. // Enter back to into chill mode.
nodeManager.forceEnterChillMode(); nodeManager.enterChillMode();
assertFalse(nodeManager.isOutOfNodeChillMode()); assertFalse(nodeManager.isOutOfChillMode());
status = nodeManager.getChillModeStatus(); status = nodeManager.getChillModeStatus();
Assert.assertThat(status, Assert.assertThat(status,
CoreMatchers.containsString("Out of startup chill mode," + CoreMatchers.containsString("Out of startup chill mode," +
" but in manual chill mode.")); " but in manual chill mode."));
assertTrue((nodeManager.isInManualChillMode()));
// Assert that node manager force enter cannot be overridden by nodes HBs. // Assert that node manager force enter cannot be overridden by nodes HBs.
for (int x = 0; x < 20; x++) { for (int x = 0; x < 20; x++) {
@ -928,16 +924,15 @@ public class TestNodeManager {
} }
Thread.sleep(500); Thread.sleep(500);
assertFalse(nodeManager.isOutOfNodeChillMode()); assertFalse(nodeManager.isOutOfChillMode());
// Make sure that once we clear the manual chill mode flag, we fall back // Make sure that once we exit out of manual chill mode, we fall back
// to the number of nodes to get out chill mode. // to the number of nodes to get out chill mode.
nodeManager.clearChillModeFlag(); nodeManager.exitChillMode();
assertTrue(nodeManager.isOutOfNodeChillMode()); assertTrue(nodeManager.isOutOfChillMode());
status = nodeManager.getChillModeStatus(); status = nodeManager.getChillModeStatus();
Assert.assertThat(status, Assert.assertThat(status,
CoreMatchers.containsString("Out of chill mode.")); CoreMatchers.containsString("Out of chill mode."));
assertFalse(nodeManager.isInManualChillMode());
} }
} }