HDDS-526. Clean previous chill mode code from NodeManager. Contributed by Ajay Kumar.

This commit is contained in:
Nanda kumar 2018-10-03 15:15:43 +05:30
parent 2626f46691
commit 7b374482d2
13 changed files with 47 additions and 423 deletions

View File

@ -49,8 +49,6 @@ import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
.CHILL_MODE_EXCEPTION;
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
.INVALID_BLOCK_SIZE;
import static org.apache.hadoop.ozone.OzoneConfigKeys
@ -345,10 +343,7 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
*/
@Override
public void deleteBlocks(List<BlockID> blockIDs) throws IOException {
if (!nodeManager.isOutOfChillMode()) {
throw new SCMException("Unable to delete block while in chill mode",
CHILL_MODE_EXCEPTION);
}
ScmUtils.preCheck(ScmOps.deleteBlock, chillModePrecheck);
LOG.info("Deleting blocks {}", StringUtils.join(",", blockIDs));
Map<Long, List<Long>> containerBlocks = new HashMap<>();

View File

@ -92,26 +92,6 @@ public interface NodeManager extends StorageContainerNodeProtocol,
*/
List<DatanodeDetails> getAllNodes();
/**
* Chill mode is the period when node manager waits for a minimum
* configured number of datanodes to report in. This is called chill mode
* to indicate the period before node manager gets into action.
*
* Forcefully exits the chill mode, even if we have not met the minimum
* criteria of the nodes reporting in.
*/
void forceExitChillMode();
/**
* Puts the node manager into manual chill mode.
*/
void enterChillMode();
/**
* Brings node manager out of manual chill mode.
*/
void exitChillMode();
/**
* Returns the aggregated node stats.
* @return the aggregated node stats.

View File

@ -28,25 +28,6 @@ import java.util.Map;
*/
@InterfaceAudience.Private
public interface NodeManagerMXBean {
/**
* Get the minimum number of nodes to get out of chill mode.
*
* @return int
*/
int getMinimumChillModeNodes();
/**
* Returns a chill mode status string.
* @return String
*/
String getChillModeStatus();
/**
* Returns true if node manager is out of chill mode, else false.
* @return true if out of chill mode, else false
*/
boolean isOutOfChillMode();
/**
* Get the number of data nodes that in all states.

View File

@ -66,7 +66,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* Maintains information about the Datanodes on SCM side.
@ -94,22 +93,8 @@ public class SCMNodeManager
// can always calculate it from nodeStats whenever required.
// Aggregated node stats
private SCMNodeStat scmStat;
// Should we create ChillModeManager and extract all the chill mode logic
// to a new class?
private int chillModeNodeCount;
private final String clusterID;
private final VersionInfo version;
/**
* During start up of SCM, it will enter into chill mode and will be there
* until number of Datanodes registered reaches {@code chillModeNodeCount}.
* This flag is for tracking startup chill mode.
*/
private AtomicBoolean inStartupChillMode;
/**
* Administrator can put SCM into chill mode manually.
* This flag is for tracking manual chill mode.
*/
private AtomicBoolean inManualChillMode;
private final CommandQueue commandQueue;
// Node manager MXBean
private ObjectName nmInfoBean;
@ -128,10 +113,6 @@ public class SCMNodeManager
this.clusterID = clusterID;
this.version = VersionInfo.getLatestVersion();
this.commandQueue = new CommandQueue();
// TODO: Support this value as a Percentage of known machines.
this.chillModeNodeCount = 1;
this.inStartupChillMode = new AtomicBoolean(true);
this.inManualChillMode = new AtomicBoolean(false);
this.scmManager = scmManager;
LOG.info("Entering startup chill mode.");
registerMXBean();
@ -183,91 +164,6 @@ public class SCMNodeManager
return nodeStateManager.getAllNodes();
}
/**
* Get the minimum number of nodes to get out of Chill mode.
*
* @return int
*/
@Override
public int getMinimumChillModeNodes() {
return chillModeNodeCount;
}
/**
* Sets the Minimum chill mode nodes count, used only in testing.
*
* @param count - Number of nodes.
*/
@VisibleForTesting
public void setMinimumChillModeNodes(int count) {
chillModeNodeCount = count;
}
/**
* Returns chill mode Status string.
* @return String
*/
@Override
public String getChillModeStatus() {
if (inStartupChillMode.get()) {
return "Still in chill mode, waiting on nodes to report in." +
String.format(" %d nodes reported, minimal %d nodes required.",
nodeStateManager.getTotalNodeCount(), getMinimumChillModeNodes());
}
if (inManualChillMode.get()) {
return "Out of startup chill mode, but in manual chill mode." +
String.format(" %d nodes have reported in.",
nodeStateManager.getTotalNodeCount());
}
return "Out of chill mode." +
String.format(" %d nodes have reported in.",
nodeStateManager.getTotalNodeCount());
}
/**
* Forcefully exits the chill mode even if we have not met the minimum
* criteria of exiting the chill mode. This will exit from both startup
* and manual chill mode.
*/
@Override
public void forceExitChillMode() {
if(inStartupChillMode.get()) {
LOG.info("Leaving startup chill mode.");
inStartupChillMode.set(false);
}
if(inManualChillMode.get()) {
LOG.info("Leaving manual chill mode.");
inManualChillMode.set(false);
}
}
/**
* Puts the node manager into manual chill mode.
*/
@Override
public void enterChillMode() {
LOG.info("Entering manual chill mode.");
inManualChillMode.set(true);
}
/**
* Brings node manager out of manual chill mode.
*/
@Override
public void exitChillMode() {
LOG.info("Leaving manual chill mode.");
inManualChillMode.set(false);
}
/**
* Returns true if node manager is out of chill mode, else false.
* @return true if out of chill mode, else false
*/
@Override
public boolean isOutOfChillMode() {
return !(inStartupChillMode.get() || inManualChillMode.get());
}
/**
* Returns the Number of Datanodes by State they are in.
*
@ -379,11 +275,6 @@ public class SCMNodeManager
try {
nodeStateManager.addNode(datanodeDetails);
nodeStateManager.setNodeStat(dnId, new SCMNodeStat());
if(inStartupChillMode.get() &&
nodeStateManager.getTotalNodeCount() >= getMinimumChillModeNodes()) {
inStartupChillMode.getAndSet(false);
LOG.info("Leaving startup chill mode.");
}
// Updating Node Report, as registration is successful
updateNodeStat(datanodeDetails.getUuid(), nodeReport);
LOG.info("Data node with ID: {} Registered.", datanodeDetails.getUuid());

View File

@ -47,4 +47,16 @@ public interface SCMMXBean extends ServiceRuntimeInfo {
* @return The datanodeUUid to report json string mapping
*/
Map<String, String> getContainerReport();
/**
* Returns chill mode status.
* @return boolean
*/
boolean isInChillMode();
/**
* Returns live chill mode container threshold.
* @return String
*/
double getChillModeCurrentContainerThreshold();
}

View File

@ -884,6 +884,21 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
return id2StatMap;
}
/**
* Returns live chill mode container threshold.
*
* @return String
*/
@Override
public double getChillModeCurrentContainerThreshold() {
return getCurrentContainerThreshold();
}
/**
* Returns chill mode status.
* @return boolean
*/
@Override
public boolean isInChillMode() {
return scmChillModeManager.getInChillMode();
}

View File

@ -198,64 +198,6 @@ public class MockNodeManager implements NodeManager {
return null;
}
/**
* Get the minimum number of nodes to get out of chill mode.
*
* @return int
*/
@Override
public int getMinimumChillModeNodes() {
return 0;
}
/**
* Chill mode is the period when node manager waits for a minimum configured
* number of datanodes to report in. This is called chill mode to indicate the
* period before node manager gets into action.
* <p>
* Forcefully exits the chill mode, even if we have not met the minimum
* criteria of the nodes reporting in.
*/
@Override
public void forceExitChillMode() {
}
/**
* Puts the node manager into manual chill mode.
*/
@Override
public void enterChillMode() {
}
/**
* Brings node manager out of manual chill mode.
*/
@Override
public void exitChillMode() {
}
/**
* Returns true if node manager is out of chill mode, else false.
* @return true if out of chill mode, else false
*/
@Override
public boolean isOutOfChillMode() {
return !chillmode;
}
/**
* Returns a chill mode status string.
*
* @return String
*/
@Override
public String getChillModeStatus() {
return null;
}
/**
* Returns the aggregated node stats.
* @return the aggregated node stats.

View File

@ -56,8 +56,6 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState
.HEALTHY;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
* Test for different container placement policy.
@ -96,8 +94,6 @@ public class TestContainerPlacement {
Mockito.mock(DeadNodeHandler.class));
SCMNodeManager nodeManager = new SCMNodeManager(config,
UUID.randomUUID().toString(), null, eventQueue);
assertFalse("Node manager should be in chill mode",
nodeManager.isOutOfChillMode());
return nodeManager;
}
@ -154,8 +150,6 @@ public class TestContainerPlacement {
assertEquals(remaining * nodeCount,
(long) nodeManager.getStats().getRemaining().get());
assertTrue(nodeManager.isOutOfChillMode());
ContainerWithPipeline containerWithPipeline = containerManager
.allocateContainer(
xceiverClientManager.getType(),

View File

@ -71,10 +71,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState
.HEALTHY;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.core.StringStartsWith.startsWith;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
@ -135,8 +133,6 @@ public class TestNodeManager {
Mockito.mock(DeadNodeHandler.class));
SCMNodeManager nodeManager = new SCMNodeManager(config,
UUID.randomUUID().toString(), null, eventQueue);
assertFalse("Node manager should be in chill mode",
nodeManager.isOutOfChillMode());
return nodeManager;
}
@ -153,8 +149,9 @@ public class TestNodeManager {
InterruptedException, TimeoutException {
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
int registeredNodes = 5;
// Send some heartbeats from different nodes.
for (int x = 0; x < nodeManager.getMinimumChillModeNodes(); x++) {
for (int x = 0; x < registeredNodes; x++) {
DatanodeDetails datanodeDetails = TestUtils
.createRandomDatanodeAndRegister(nodeManager);
nodeManager.processHeartbeat(datanodeDetails);
@ -163,8 +160,8 @@ public class TestNodeManager {
//TODO: wait for heartbeat to be processed
Thread.sleep(4 * 1000);
assertTrue("Heartbeat thread should have picked up the" +
"scheduled heartbeats and transitioned out of chill mode.",
nodeManager.isOutOfChillMode());
"scheduled heartbeats.",
nodeManager.getAllNodes().size() == registeredNodes);
}
}
@ -182,60 +179,8 @@ public class TestNodeManager {
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
//TODO: wait for heartbeat to be processed
Thread.sleep(4 * 1000);
assertFalse("No heartbeats, Node manager should have been in" +
" chill mode.", nodeManager.isOutOfChillMode());
}
}
/**
* Asserts that if we don't get enough unique nodes we stay in chillmode.
*
* @throws IOException
* @throws InterruptedException
* @throws TimeoutException
*/
@Test
public void testScmNotEnoughHeartbeats() throws IOException,
InterruptedException, TimeoutException {
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
// Need 100 nodes to come out of chill mode, only one node is sending HB.
nodeManager.setMinimumChillModeNodes(100);
nodeManager.processHeartbeat(TestUtils
.createRandomDatanodeAndRegister(nodeManager));
//TODO: wait for heartbeat to be processed
Thread.sleep(4 * 1000);
assertFalse("Not enough heartbeat, Node manager should have" +
"been in chillmode.", nodeManager.isOutOfChillMode());
}
}
/**
* Asserts that many heartbeat from the same node is counted as a single
* node.
*
* @throws IOException
* @throws InterruptedException
* @throws TimeoutException
*/
@Test
public void testScmSameNodeHeartbeats() throws IOException,
InterruptedException, TimeoutException {
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
nodeManager.setMinimumChillModeNodes(3);
DatanodeDetails datanodeDetails = TestUtils
.createRandomDatanodeAndRegister(nodeManager);
// Send 10 heartbeat from same node, and assert we never leave chill mode.
for (int x = 0; x < 10; x++) {
nodeManager.processHeartbeat(datanodeDetails);
}
//TODO: wait for heartbeat to be processed
Thread.sleep(4 * 1000);
assertFalse("Not enough nodes have send heartbeat to node" +
"manager.", nodeManager.isOutOfChillMode());
assertTrue("No heartbeats, 0 nodes should be registered",
nodeManager.getAllNodes().size() == 0);
}
}
@ -872,62 +817,6 @@ public class TestNodeManager {
}
}
@Test
public void testScmEnterAndExitChillMode() throws IOException,
InterruptedException {
OzoneConfiguration conf = getConf();
conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100,
MILLISECONDS);
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
nodeManager.setMinimumChillModeNodes(10);
DatanodeDetails datanodeDetails = TestUtils
.createRandomDatanodeAndRegister(nodeManager);
nodeManager.processHeartbeat(datanodeDetails);
String status = nodeManager.getChillModeStatus();
Assert.assertThat(status, containsString("Still in chill " +
"mode, waiting on nodes to report in."));
// Should not exit chill mode since 10 nodes have not heartbeat yet.
assertFalse(nodeManager.isOutOfChillMode());
// Force exit chill mode.
nodeManager.forceExitChillMode();
assertTrue(nodeManager.isOutOfChillMode());
status = nodeManager.getChillModeStatus();
Assert.assertThat(status,
containsString("Out of chill mode."));
// Enter back to into chill mode.
nodeManager.enterChillMode();
assertFalse(nodeManager.isOutOfChillMode());
status = nodeManager.getChillModeStatus();
Assert.assertThat(status,
containsString("Out of startup chill mode," +
" but in manual chill mode."));
// Assert that node manager force enter cannot be overridden by nodes HBs.
for (int x = 0; x < 20; x++) {
DatanodeDetails datanode = TestUtils
.createRandomDatanodeAndRegister(nodeManager);
nodeManager.processHeartbeat(datanode);
}
Thread.sleep(500);
assertFalse(nodeManager.isOutOfChillMode());
// Make sure that once we exit out of manual chill mode, we fall back
// to the number of nodes to get out chill mode.
nodeManager.exitChillMode();
assertTrue(nodeManager.isOutOfChillMode());
status = nodeManager.getChillModeStatus();
Assert.assertThat(status,
containsString("Out of chill mode."));
}
}
/**
* Test multiple nodes sending initial heartbeat with their node report.
*

View File

@ -65,26 +65,6 @@ public class ReplicationNodeManagerMock implements NodeManager {
this.commandQueue = commandQueue;
}
/**
* Get the minimum number of nodes to get out of chill mode.
*
* @return int
*/
@Override
public int getMinimumChillModeNodes() {
return 0;
}
/**
* Returns a chill mode status string.
*
* @return String
*/
@Override
public String getChillModeStatus() {
return null;
}
/**
* Get the number of data nodes that in all states.
*
@ -140,44 +120,6 @@ public class ReplicationNodeManagerMock implements NodeManager {
return null;
}
/**
* Chill mode is the period when node manager waits for a minimum
* configured number of datanodes to report in. This is called chill mode
* to indicate the period before node manager gets into action.
* <p>
* Forcefully exits the chill mode, even if we have not met the minimum
* criteria of the nodes reporting in.
*/
@Override
public void forceExitChillMode() {
}
/**
* Puts the node manager into manual chill mode.
*/
@Override
public void enterChillMode() {
}
/**
* Brings node manager out of manual chill mode.
*/
@Override
public void exitChillMode() {
}
/**
* Returns true if node manager is out of chill mode, else false.
* @return true if out of chill mode, else false
*/
@Override
public boolean isOutOfChillMode() {
return !nodeStateMap.isEmpty();
}
/**
* Returns the aggregated node stats.
*

View File

@ -66,6 +66,7 @@ public class TestContainerStateManagerIntegration {
conf = new OzoneConfiguration();
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
cluster.waitForClusterToBeReady();
cluster.waitTobeOutOfChillMode();
xceiverClientManager = new XceiverClientManager(conf);
scm = cluster.getStorageContainerManager();
scmContainerMapping = (ContainerMapping) scm.getScmContainerManager();

View File

@ -135,12 +135,12 @@ public final class MiniOzoneClusterImpl implements MiniOzoneCluster {
public void waitTobeOutOfChillMode()
throws TimeoutException, InterruptedException {
GenericTestUtils.waitFor(() -> {
if (scm.getScmNodeManager().isOutOfChillMode()) {
if (!scm.isInChillMode()) {
return true;
}
LOG.info("Waiting for cluster to be ready. No datanodes found");
return false;
}, 100, 45000);
}, 100, 1000 * 45);
}
@Override

View File

@ -24,15 +24,9 @@ import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.container.placement.metrics.ContainerStat;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.junit.BeforeClass;
import org.junit.AfterClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import java.io.IOException;
@ -45,6 +39,10 @@ import java.util.concurrent.TimeoutException;
import javax.management.openmbean.CompositeData;
import javax.management.openmbean.TabularData;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
*
* This class is to test JMX management interface for scm information.
@ -110,30 +108,14 @@ public class TestSCMMXBean {
assertEquals("nodeID", key);
assertEquals(stat.toJsonString(), value);
}
}
@Test
public void testSCMNodeManagerMXBean() throws Exception {
final NodeManager scmNm = scm.getScmNodeManager();
ObjectName bean = new ObjectName(
"Hadoop:service=SCMNodeManager,name=SCMNodeManagerInfo");
boolean inChillMode = (boolean) mbs.getAttribute(bean,
"InChillMode");
assertEquals(scm.isInChillMode(), inChillMode);
Integer minChillNodes = (Integer)mbs.getAttribute(bean,
"MinimumChillModeNodes");
assertEquals(scmNm.getMinimumChillModeNodes(),
minChillNodes.intValue());
boolean isOutOfChillMode = (boolean)mbs.getAttribute(bean,
"OutOfChillMode");
assertEquals(scmNm.isOutOfChillMode(), isOutOfChillMode);
String chillStatus = (String)mbs.getAttribute(bean,
"ChillModeStatus");
assertEquals(scmNm.getChillModeStatus(), chillStatus);
TabularData nodeCountObj = (TabularData)mbs.getAttribute(bean,
"NodeCount");
verifyEquals(nodeCountObj, scm.getScmNodeManager().getNodeCount());
double containerThreshold = (double) mbs.getAttribute(bean,
"ChillModeCurrentContainerThreshold");
assertEquals(scm.getCurrentContainerThreshold(), containerThreshold, 0);
}
/**