HDDS-526. Clean previous chill mode code from NodeManager. Contributed by Ajay Kumar.
This commit is contained in:
parent
2626f46691
commit
7b374482d2
@ -49,8 +49,6 @@
|
|||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
|
|
||||||
.CHILL_MODE_EXCEPTION;
|
|
||||||
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
|
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
|
||||||
.INVALID_BLOCK_SIZE;
|
.INVALID_BLOCK_SIZE;
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
@ -345,10 +343,7 @@ private AllocatedBlock newBlock(ContainerWithPipeline containerWithPipeline,
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void deleteBlocks(List<BlockID> blockIDs) throws IOException {
|
public void deleteBlocks(List<BlockID> blockIDs) throws IOException {
|
||||||
if (!nodeManager.isOutOfChillMode()) {
|
ScmUtils.preCheck(ScmOps.deleteBlock, chillModePrecheck);
|
||||||
throw new SCMException("Unable to delete block while in chill mode",
|
|
||||||
CHILL_MODE_EXCEPTION);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.info("Deleting blocks {}", StringUtils.join(",", blockIDs));
|
LOG.info("Deleting blocks {}", StringUtils.join(",", blockIDs));
|
||||||
Map<Long, List<Long>> containerBlocks = new HashMap<>();
|
Map<Long, List<Long>> containerBlocks = new HashMap<>();
|
||||||
|
@ -92,26 +92,6 @@ public interface NodeManager extends StorageContainerNodeProtocol,
|
|||||||
*/
|
*/
|
||||||
List<DatanodeDetails> getAllNodes();
|
List<DatanodeDetails> getAllNodes();
|
||||||
|
|
||||||
/**
|
|
||||||
* Chill mode is the period when node manager waits for a minimum
|
|
||||||
* configured number of datanodes to report in. This is called chill mode
|
|
||||||
* to indicate the period before node manager gets into action.
|
|
||||||
*
|
|
||||||
* Forcefully exits the chill mode, even if we have not met the minimum
|
|
||||||
* criteria of the nodes reporting in.
|
|
||||||
*/
|
|
||||||
void forceExitChillMode();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Puts the node manager into manual chill mode.
|
|
||||||
*/
|
|
||||||
void enterChillMode();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Brings node manager out of manual chill mode.
|
|
||||||
*/
|
|
||||||
void exitChillMode();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the aggregated node stats.
|
* Returns the aggregated node stats.
|
||||||
* @return the aggregated node stats.
|
* @return the aggregated node stats.
|
||||||
|
@ -28,25 +28,6 @@
|
|||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public interface NodeManagerMXBean {
|
public interface NodeManagerMXBean {
|
||||||
/**
|
|
||||||
* Get the minimum number of nodes to get out of chill mode.
|
|
||||||
*
|
|
||||||
* @return int
|
|
||||||
*/
|
|
||||||
int getMinimumChillModeNodes();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a chill mode status string.
|
|
||||||
* @return String
|
|
||||||
*/
|
|
||||||
String getChillModeStatus();
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if node manager is out of chill mode, else false.
|
|
||||||
* @return true if out of chill mode, else false
|
|
||||||
*/
|
|
||||||
boolean isOutOfChillMode();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the number of data nodes that in all states.
|
* Get the number of data nodes that in all states.
|
||||||
|
@ -66,7 +66,6 @@
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maintains information about the Datanodes on SCM side.
|
* Maintains information about the Datanodes on SCM side.
|
||||||
@ -94,22 +93,8 @@ public class SCMNodeManager
|
|||||||
// can always calculate it from nodeStats whenever required.
|
// can always calculate it from nodeStats whenever required.
|
||||||
// Aggregated node stats
|
// Aggregated node stats
|
||||||
private SCMNodeStat scmStat;
|
private SCMNodeStat scmStat;
|
||||||
// Should we create ChillModeManager and extract all the chill mode logic
|
|
||||||
// to a new class?
|
|
||||||
private int chillModeNodeCount;
|
|
||||||
private final String clusterID;
|
private final String clusterID;
|
||||||
private final VersionInfo version;
|
private final VersionInfo version;
|
||||||
/**
|
|
||||||
* During start up of SCM, it will enter into chill mode and will be there
|
|
||||||
* until number of Datanodes registered reaches {@code chillModeNodeCount}.
|
|
||||||
* This flag is for tracking startup chill mode.
|
|
||||||
*/
|
|
||||||
private AtomicBoolean inStartupChillMode;
|
|
||||||
/**
|
|
||||||
* Administrator can put SCM into chill mode manually.
|
|
||||||
* This flag is for tracking manual chill mode.
|
|
||||||
*/
|
|
||||||
private AtomicBoolean inManualChillMode;
|
|
||||||
private final CommandQueue commandQueue;
|
private final CommandQueue commandQueue;
|
||||||
// Node manager MXBean
|
// Node manager MXBean
|
||||||
private ObjectName nmInfoBean;
|
private ObjectName nmInfoBean;
|
||||||
@ -128,10 +113,6 @@ public SCMNodeManager(OzoneConfiguration conf, String clusterID,
|
|||||||
this.clusterID = clusterID;
|
this.clusterID = clusterID;
|
||||||
this.version = VersionInfo.getLatestVersion();
|
this.version = VersionInfo.getLatestVersion();
|
||||||
this.commandQueue = new CommandQueue();
|
this.commandQueue = new CommandQueue();
|
||||||
// TODO: Support this value as a Percentage of known machines.
|
|
||||||
this.chillModeNodeCount = 1;
|
|
||||||
this.inStartupChillMode = new AtomicBoolean(true);
|
|
||||||
this.inManualChillMode = new AtomicBoolean(false);
|
|
||||||
this.scmManager = scmManager;
|
this.scmManager = scmManager;
|
||||||
LOG.info("Entering startup chill mode.");
|
LOG.info("Entering startup chill mode.");
|
||||||
registerMXBean();
|
registerMXBean();
|
||||||
@ -183,91 +164,6 @@ public List<DatanodeDetails> getAllNodes() {
|
|||||||
return nodeStateManager.getAllNodes();
|
return nodeStateManager.getAllNodes();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the minimum number of nodes to get out of Chill mode.
|
|
||||||
*
|
|
||||||
* @return int
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getMinimumChillModeNodes() {
|
|
||||||
return chillModeNodeCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the Minimum chill mode nodes count, used only in testing.
|
|
||||||
*
|
|
||||||
* @param count - Number of nodes.
|
|
||||||
*/
|
|
||||||
@VisibleForTesting
|
|
||||||
public void setMinimumChillModeNodes(int count) {
|
|
||||||
chillModeNodeCount = count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns chill mode Status string.
|
|
||||||
* @return String
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getChillModeStatus() {
|
|
||||||
if (inStartupChillMode.get()) {
|
|
||||||
return "Still in chill mode, waiting on nodes to report in." +
|
|
||||||
String.format(" %d nodes reported, minimal %d nodes required.",
|
|
||||||
nodeStateManager.getTotalNodeCount(), getMinimumChillModeNodes());
|
|
||||||
}
|
|
||||||
if (inManualChillMode.get()) {
|
|
||||||
return "Out of startup chill mode, but in manual chill mode." +
|
|
||||||
String.format(" %d nodes have reported in.",
|
|
||||||
nodeStateManager.getTotalNodeCount());
|
|
||||||
}
|
|
||||||
return "Out of chill mode." +
|
|
||||||
String.format(" %d nodes have reported in.",
|
|
||||||
nodeStateManager.getTotalNodeCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Forcefully exits the chill mode even if we have not met the minimum
|
|
||||||
* criteria of exiting the chill mode. This will exit from both startup
|
|
||||||
* and manual chill mode.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void forceExitChillMode() {
|
|
||||||
if(inStartupChillMode.get()) {
|
|
||||||
LOG.info("Leaving startup chill mode.");
|
|
||||||
inStartupChillMode.set(false);
|
|
||||||
}
|
|
||||||
if(inManualChillMode.get()) {
|
|
||||||
LOG.info("Leaving manual chill mode.");
|
|
||||||
inManualChillMode.set(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Puts the node manager into manual chill mode.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void enterChillMode() {
|
|
||||||
LOG.info("Entering manual chill mode.");
|
|
||||||
inManualChillMode.set(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Brings node manager out of manual chill mode.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void exitChillMode() {
|
|
||||||
LOG.info("Leaving manual chill mode.");
|
|
||||||
inManualChillMode.set(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if node manager is out of chill mode, else false.
|
|
||||||
* @return true if out of chill mode, else false
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isOutOfChillMode() {
|
|
||||||
return !(inStartupChillMode.get() || inManualChillMode.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the Number of Datanodes by State they are in.
|
* Returns the Number of Datanodes by State they are in.
|
||||||
*
|
*
|
||||||
@ -379,11 +275,6 @@ public RegisteredCommand register(
|
|||||||
try {
|
try {
|
||||||
nodeStateManager.addNode(datanodeDetails);
|
nodeStateManager.addNode(datanodeDetails);
|
||||||
nodeStateManager.setNodeStat(dnId, new SCMNodeStat());
|
nodeStateManager.setNodeStat(dnId, new SCMNodeStat());
|
||||||
if(inStartupChillMode.get() &&
|
|
||||||
nodeStateManager.getTotalNodeCount() >= getMinimumChillModeNodes()) {
|
|
||||||
inStartupChillMode.getAndSet(false);
|
|
||||||
LOG.info("Leaving startup chill mode.");
|
|
||||||
}
|
|
||||||
// Updating Node Report, as registration is successful
|
// Updating Node Report, as registration is successful
|
||||||
updateNodeStat(datanodeDetails.getUuid(), nodeReport);
|
updateNodeStat(datanodeDetails.getUuid(), nodeReport);
|
||||||
LOG.info("Data node with ID: {} Registered.", datanodeDetails.getUuid());
|
LOG.info("Data node with ID: {} Registered.", datanodeDetails.getUuid());
|
||||||
|
@ -47,4 +47,16 @@ public interface SCMMXBean extends ServiceRuntimeInfo {
|
|||||||
* @return The datanodeUUid to report json string mapping
|
* @return The datanodeUUid to report json string mapping
|
||||||
*/
|
*/
|
||||||
Map<String, String> getContainerReport();
|
Map<String, String> getContainerReport();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns chill mode status.
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
boolean isInChillMode();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns live chill mode container threshold.
|
||||||
|
* @return String
|
||||||
|
*/
|
||||||
|
double getChillModeCurrentContainerThreshold();
|
||||||
}
|
}
|
||||||
|
@ -884,6 +884,21 @@ public Map<String, String> getContainerReport() {
|
|||||||
return id2StatMap;
|
return id2StatMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns live chill mode container threshold.
|
||||||
|
*
|
||||||
|
* @return String
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getChillModeCurrentContainerThreshold() {
|
||||||
|
return getCurrentContainerThreshold();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns chill mode status.
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
public boolean isInChillMode() {
|
public boolean isInChillMode() {
|
||||||
return scmChillModeManager.getInChillMode();
|
return scmChillModeManager.getInChillMode();
|
||||||
}
|
}
|
||||||
|
@ -198,64 +198,6 @@ public List<DatanodeDetails> getAllNodes() {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the minimum number of nodes to get out of chill mode.
|
|
||||||
*
|
|
||||||
* @return int
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getMinimumChillModeNodes() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Chill mode is the period when node manager waits for a minimum configured
|
|
||||||
* number of datanodes to report in. This is called chill mode to indicate the
|
|
||||||
* period before node manager gets into action.
|
|
||||||
* <p>
|
|
||||||
* Forcefully exits the chill mode, even if we have not met the minimum
|
|
||||||
* criteria of the nodes reporting in.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void forceExitChillMode() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Puts the node manager into manual chill mode.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void enterChillMode() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Brings node manager out of manual chill mode.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void exitChillMode() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if node manager is out of chill mode, else false.
|
|
||||||
* @return true if out of chill mode, else false
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isOutOfChillMode() {
|
|
||||||
return !chillmode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a chill mode status string.
|
|
||||||
*
|
|
||||||
* @return String
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getChillModeStatus() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the aggregated node stats.
|
* Returns the aggregated node stats.
|
||||||
* @return the aggregated node stats.
|
* @return the aggregated node stats.
|
||||||
|
@ -56,8 +56,6 @@
|
|||||||
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState
|
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState
|
||||||
.HEALTHY;
|
.HEALTHY;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test for different container placement policy.
|
* Test for different container placement policy.
|
||||||
@ -96,8 +94,6 @@ SCMNodeManager createNodeManager(OzoneConfiguration config)
|
|||||||
Mockito.mock(DeadNodeHandler.class));
|
Mockito.mock(DeadNodeHandler.class));
|
||||||
SCMNodeManager nodeManager = new SCMNodeManager(config,
|
SCMNodeManager nodeManager = new SCMNodeManager(config,
|
||||||
UUID.randomUUID().toString(), null, eventQueue);
|
UUID.randomUUID().toString(), null, eventQueue);
|
||||||
assertFalse("Node manager should be in chill mode",
|
|
||||||
nodeManager.isOutOfChillMode());
|
|
||||||
return nodeManager;
|
return nodeManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -154,8 +150,6 @@ public void testContainerPlacementCapacity() throws IOException,
|
|||||||
assertEquals(remaining * nodeCount,
|
assertEquals(remaining * nodeCount,
|
||||||
(long) nodeManager.getStats().getRemaining().get());
|
(long) nodeManager.getStats().getRemaining().get());
|
||||||
|
|
||||||
assertTrue(nodeManager.isOutOfChillMode());
|
|
||||||
|
|
||||||
ContainerWithPipeline containerWithPipeline = containerManager
|
ContainerWithPipeline containerWithPipeline = containerManager
|
||||||
.allocateContainer(
|
.allocateContainer(
|
||||||
xceiverClientManager.getType(),
|
xceiverClientManager.getType(),
|
||||||
|
@ -71,10 +71,8 @@
|
|||||||
.HEALTHY;
|
.HEALTHY;
|
||||||
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
|
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
|
||||||
import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND;
|
import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND;
|
||||||
import static org.hamcrest.CoreMatchers.containsString;
|
|
||||||
import static org.hamcrest.core.StringStartsWith.startsWith;
|
import static org.hamcrest.core.StringStartsWith.startsWith;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -135,8 +133,6 @@ SCMNodeManager createNodeManager(OzoneConfiguration config)
|
|||||||
Mockito.mock(DeadNodeHandler.class));
|
Mockito.mock(DeadNodeHandler.class));
|
||||||
SCMNodeManager nodeManager = new SCMNodeManager(config,
|
SCMNodeManager nodeManager = new SCMNodeManager(config,
|
||||||
UUID.randomUUID().toString(), null, eventQueue);
|
UUID.randomUUID().toString(), null, eventQueue);
|
||||||
assertFalse("Node manager should be in chill mode",
|
|
||||||
nodeManager.isOutOfChillMode());
|
|
||||||
return nodeManager;
|
return nodeManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,8 +149,9 @@ public void testScmHeartbeat() throws IOException,
|
|||||||
InterruptedException, TimeoutException {
|
InterruptedException, TimeoutException {
|
||||||
|
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
||||||
|
int registeredNodes = 5;
|
||||||
// Send some heartbeats from different nodes.
|
// Send some heartbeats from different nodes.
|
||||||
for (int x = 0; x < nodeManager.getMinimumChillModeNodes(); x++) {
|
for (int x = 0; x < registeredNodes; x++) {
|
||||||
DatanodeDetails datanodeDetails = TestUtils
|
DatanodeDetails datanodeDetails = TestUtils
|
||||||
.createRandomDatanodeAndRegister(nodeManager);
|
.createRandomDatanodeAndRegister(nodeManager);
|
||||||
nodeManager.processHeartbeat(datanodeDetails);
|
nodeManager.processHeartbeat(datanodeDetails);
|
||||||
@ -163,8 +160,8 @@ public void testScmHeartbeat() throws IOException,
|
|||||||
//TODO: wait for heartbeat to be processed
|
//TODO: wait for heartbeat to be processed
|
||||||
Thread.sleep(4 * 1000);
|
Thread.sleep(4 * 1000);
|
||||||
assertTrue("Heartbeat thread should have picked up the" +
|
assertTrue("Heartbeat thread should have picked up the" +
|
||||||
"scheduled heartbeats and transitioned out of chill mode.",
|
"scheduled heartbeats.",
|
||||||
nodeManager.isOutOfChillMode());
|
nodeManager.getAllNodes().size() == registeredNodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,60 +179,8 @@ public void testScmNoHeartbeats() throws IOException,
|
|||||||
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
||||||
//TODO: wait for heartbeat to be processed
|
//TODO: wait for heartbeat to be processed
|
||||||
Thread.sleep(4 * 1000);
|
Thread.sleep(4 * 1000);
|
||||||
assertFalse("No heartbeats, Node manager should have been in" +
|
assertTrue("No heartbeats, 0 nodes should be registered",
|
||||||
" chill mode.", nodeManager.isOutOfChillMode());
|
nodeManager.getAllNodes().size() == 0);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Asserts that if we don't get enough unique nodes we stay in chillmode.
|
|
||||||
*
|
|
||||||
* @throws IOException
|
|
||||||
* @throws InterruptedException
|
|
||||||
* @throws TimeoutException
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testScmNotEnoughHeartbeats() throws IOException,
|
|
||||||
InterruptedException, TimeoutException {
|
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
|
||||||
|
|
||||||
// Need 100 nodes to come out of chill mode, only one node is sending HB.
|
|
||||||
nodeManager.setMinimumChillModeNodes(100);
|
|
||||||
nodeManager.processHeartbeat(TestUtils
|
|
||||||
.createRandomDatanodeAndRegister(nodeManager));
|
|
||||||
//TODO: wait for heartbeat to be processed
|
|
||||||
Thread.sleep(4 * 1000);
|
|
||||||
assertFalse("Not enough heartbeat, Node manager should have" +
|
|
||||||
"been in chillmode.", nodeManager.isOutOfChillMode());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Asserts that many heartbeat from the same node is counted as a single
|
|
||||||
* node.
|
|
||||||
*
|
|
||||||
* @throws IOException
|
|
||||||
* @throws InterruptedException
|
|
||||||
* @throws TimeoutException
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testScmSameNodeHeartbeats() throws IOException,
|
|
||||||
InterruptedException, TimeoutException {
|
|
||||||
|
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
|
||||||
nodeManager.setMinimumChillModeNodes(3);
|
|
||||||
DatanodeDetails datanodeDetails = TestUtils
|
|
||||||
.createRandomDatanodeAndRegister(nodeManager);
|
|
||||||
|
|
||||||
// Send 10 heartbeat from same node, and assert we never leave chill mode.
|
|
||||||
for (int x = 0; x < 10; x++) {
|
|
||||||
nodeManager.processHeartbeat(datanodeDetails);
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO: wait for heartbeat to be processed
|
|
||||||
Thread.sleep(4 * 1000);
|
|
||||||
assertFalse("Not enough nodes have send heartbeat to node" +
|
|
||||||
"manager.", nodeManager.isOutOfChillMode());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -872,62 +817,6 @@ public void testScmCanHandleScale() throws IOException,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testScmEnterAndExitChillMode() throws IOException,
|
|
||||||
InterruptedException {
|
|
||||||
OzoneConfiguration conf = getConf();
|
|
||||||
conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100,
|
|
||||||
MILLISECONDS);
|
|
||||||
|
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
|
||||||
nodeManager.setMinimumChillModeNodes(10);
|
|
||||||
DatanodeDetails datanodeDetails = TestUtils
|
|
||||||
.createRandomDatanodeAndRegister(nodeManager);
|
|
||||||
nodeManager.processHeartbeat(datanodeDetails);
|
|
||||||
String status = nodeManager.getChillModeStatus();
|
|
||||||
Assert.assertThat(status, containsString("Still in chill " +
|
|
||||||
"mode, waiting on nodes to report in."));
|
|
||||||
|
|
||||||
// Should not exit chill mode since 10 nodes have not heartbeat yet.
|
|
||||||
assertFalse(nodeManager.isOutOfChillMode());
|
|
||||||
|
|
||||||
// Force exit chill mode.
|
|
||||||
nodeManager.forceExitChillMode();
|
|
||||||
assertTrue(nodeManager.isOutOfChillMode());
|
|
||||||
status = nodeManager.getChillModeStatus();
|
|
||||||
Assert.assertThat(status,
|
|
||||||
containsString("Out of chill mode."));
|
|
||||||
|
|
||||||
|
|
||||||
// Enter back to into chill mode.
|
|
||||||
nodeManager.enterChillMode();
|
|
||||||
assertFalse(nodeManager.isOutOfChillMode());
|
|
||||||
status = nodeManager.getChillModeStatus();
|
|
||||||
Assert.assertThat(status,
|
|
||||||
containsString("Out of startup chill mode," +
|
|
||||||
" but in manual chill mode."));
|
|
||||||
|
|
||||||
// Assert that node manager force enter cannot be overridden by nodes HBs.
|
|
||||||
for (int x = 0; x < 20; x++) {
|
|
||||||
DatanodeDetails datanode = TestUtils
|
|
||||||
.createRandomDatanodeAndRegister(nodeManager);
|
|
||||||
nodeManager.processHeartbeat(datanode);
|
|
||||||
}
|
|
||||||
|
|
||||||
Thread.sleep(500);
|
|
||||||
assertFalse(nodeManager.isOutOfChillMode());
|
|
||||||
|
|
||||||
// Make sure that once we exit out of manual chill mode, we fall back
|
|
||||||
// to the number of nodes to get out chill mode.
|
|
||||||
nodeManager.exitChillMode();
|
|
||||||
assertTrue(nodeManager.isOutOfChillMode());
|
|
||||||
status = nodeManager.getChillModeStatus();
|
|
||||||
Assert.assertThat(status,
|
|
||||||
containsString("Out of chill mode."));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test multiple nodes sending initial heartbeat with their node report.
|
* Test multiple nodes sending initial heartbeat with their node report.
|
||||||
*
|
*
|
||||||
|
@ -65,26 +65,6 @@ public ReplicationNodeManagerMock(Map<DatanodeDetails, NodeState> nodeState,
|
|||||||
this.commandQueue = commandQueue;
|
this.commandQueue = commandQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the minimum number of nodes to get out of chill mode.
|
|
||||||
*
|
|
||||||
* @return int
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getMinimumChillModeNodes() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a chill mode status string.
|
|
||||||
*
|
|
||||||
* @return String
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getChillModeStatus() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the number of data nodes that in all states.
|
* Get the number of data nodes that in all states.
|
||||||
*
|
*
|
||||||
@ -140,44 +120,6 @@ public List<DatanodeDetails> getAllNodes() {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Chill mode is the period when node manager waits for a minimum
|
|
||||||
* configured number of datanodes to report in. This is called chill mode
|
|
||||||
* to indicate the period before node manager gets into action.
|
|
||||||
* <p>
|
|
||||||
* Forcefully exits the chill mode, even if we have not met the minimum
|
|
||||||
* criteria of the nodes reporting in.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void forceExitChillMode() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Puts the node manager into manual chill mode.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void enterChillMode() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Brings node manager out of manual chill mode.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void exitChillMode() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if node manager is out of chill mode, else false.
|
|
||||||
* @return true if out of chill mode, else false
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isOutOfChillMode() {
|
|
||||||
return !nodeStateMap.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the aggregated node stats.
|
* Returns the aggregated node stats.
|
||||||
*
|
*
|
||||||
|
@ -66,6 +66,7 @@ public void setup() throws Exception {
|
|||||||
conf = new OzoneConfiguration();
|
conf = new OzoneConfiguration();
|
||||||
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
|
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
|
||||||
cluster.waitForClusterToBeReady();
|
cluster.waitForClusterToBeReady();
|
||||||
|
cluster.waitTobeOutOfChillMode();
|
||||||
xceiverClientManager = new XceiverClientManager(conf);
|
xceiverClientManager = new XceiverClientManager(conf);
|
||||||
scm = cluster.getStorageContainerManager();
|
scm = cluster.getStorageContainerManager();
|
||||||
scmContainerMapping = (ContainerMapping) scm.getScmContainerManager();
|
scmContainerMapping = (ContainerMapping) scm.getScmContainerManager();
|
||||||
|
@ -135,12 +135,12 @@ public void waitForClusterToBeReady()
|
|||||||
public void waitTobeOutOfChillMode()
|
public void waitTobeOutOfChillMode()
|
||||||
throws TimeoutException, InterruptedException {
|
throws TimeoutException, InterruptedException {
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
if (scm.getScmNodeManager().isOutOfChillMode()) {
|
if (!scm.isInChillMode()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
LOG.info("Waiting for cluster to be ready. No datanodes found");
|
LOG.info("Waiting for cluster to be ready. No datanodes found");
|
||||||
return false;
|
return false;
|
||||||
}, 100, 45000);
|
}, 100, 1000 * 45);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -24,15 +24,9 @@
|
|||||||
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
import org.apache.hadoop.hdds.scm.container.placement.metrics.ContainerStat;
|
import org.apache.hadoop.hdds.scm.container.placement.metrics.ContainerStat;
|
||||||
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
import static org.junit.Assert.fail;
|
|
||||||
|
|
||||||
import javax.management.MBeanServer;
|
import javax.management.MBeanServer;
|
||||||
import javax.management.ObjectName;
|
import javax.management.ObjectName;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -45,6 +39,10 @@
|
|||||||
import javax.management.openmbean.CompositeData;
|
import javax.management.openmbean.CompositeData;
|
||||||
import javax.management.openmbean.TabularData;
|
import javax.management.openmbean.TabularData;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* This class is to test JMX management interface for scm information.
|
* This class is to test JMX management interface for scm information.
|
||||||
@ -110,30 +108,14 @@ public void testSCMMXBean() throws Exception {
|
|||||||
assertEquals("nodeID", key);
|
assertEquals("nodeID", key);
|
||||||
assertEquals(stat.toJsonString(), value);
|
assertEquals(stat.toJsonString(), value);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
boolean inChillMode = (boolean) mbs.getAttribute(bean,
|
||||||
public void testSCMNodeManagerMXBean() throws Exception {
|
"InChillMode");
|
||||||
final NodeManager scmNm = scm.getScmNodeManager();
|
assertEquals(scm.isInChillMode(), inChillMode);
|
||||||
ObjectName bean = new ObjectName(
|
|
||||||
"Hadoop:service=SCMNodeManager,name=SCMNodeManagerInfo");
|
|
||||||
|
|
||||||
Integer minChillNodes = (Integer)mbs.getAttribute(bean,
|
double containerThreshold = (double) mbs.getAttribute(bean,
|
||||||
"MinimumChillModeNodes");
|
"ChillModeCurrentContainerThreshold");
|
||||||
assertEquals(scmNm.getMinimumChillModeNodes(),
|
assertEquals(scm.getCurrentContainerThreshold(), containerThreshold, 0);
|
||||||
minChillNodes.intValue());
|
|
||||||
|
|
||||||
boolean isOutOfChillMode = (boolean)mbs.getAttribute(bean,
|
|
||||||
"OutOfChillMode");
|
|
||||||
assertEquals(scmNm.isOutOfChillMode(), isOutOfChillMode);
|
|
||||||
|
|
||||||
String chillStatus = (String)mbs.getAttribute(bean,
|
|
||||||
"ChillModeStatus");
|
|
||||||
assertEquals(scmNm.getChillModeStatus(), chillStatus);
|
|
||||||
|
|
||||||
TabularData nodeCountObj = (TabularData)mbs.getAttribute(bean,
|
|
||||||
"NodeCount");
|
|
||||||
verifyEquals(nodeCountObj, scm.getScmNodeManager().getNodeCount());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user