YARN-5566. Client-side NM graceful decom is not triggered when jobs finish. Addendum to fix test flakiness. (Robert Kanter via kasha)

This commit is contained in:
Karthik Kambatla 2016-09-08 21:11:20 -07:00
parent 912631a226
commit 7a27b2a82f
2 changed files with 7 additions and 6 deletions

View File

@ -636,6 +636,9 @@ public class MockRM extends ResourceManager {
public void waitForState(NodeId nodeId, NodeState finalState)
throws InterruptedException {
RMNode node = getRMContext().getRMNodes().get(nodeId);
if (node == null) {
node = getRMContext().getInactiveRMNodes().get(nodeId);
}
Assert.assertNotNull("node shouldn't be null", node);
int timeWaiting = 0;
while (!finalState.equals(node.getState())) {

View File

@ -253,19 +253,17 @@ public class TestResourceTrackerService extends NodeLabelTestBase {
rm.waitForState(nm3.getNodeId(), NodeState.DECOMMISSIONING);
nodeHeartbeat1 = nm1.nodeHeartbeat(true);
rm.waitForState(nm1.getNodeId(), NodeState.RUNNING);
nodeHeartbeat2 = nm2.nodeHeartbeat(true);
rm.waitForState(nm2.getNodeId(), NodeState.DECOMMISSIONED);
nodeHeartbeat3 = nm3.nodeHeartbeat(true);
rm.waitForState(nm3.getNodeId(), NodeState.DECOMMISSIONED);
checkDecommissionedNMCount(rm, metricCount + 2);
rm.waitForState(nm2.getNodeId(), NodeState.DECOMMISSIONED);
rm.waitForState(nm3.getNodeId(), NodeState.DECOMMISSIONED);
nodeHeartbeat1 = nm1.nodeHeartbeat(true);
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat1.getNodeAction()));
nodeHeartbeat2 = nm2.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat2.getNodeAction());
nodeHeartbeat3 = nm3.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat2.getNodeAction());
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat3.getNodeAction());
}
@ -306,8 +304,8 @@ public class TestResourceTrackerService extends NodeLabelTestBase {
// host1 should be DECOMMISSIONING due to running containers.
// host3 should become DECOMMISSIONED.
nm1.nodeHeartbeat(true);
rm.waitForState(id1, NodeState.DECOMMISSIONING);
nm3.nodeHeartbeat(true);
rm.waitForState(id1, NodeState.DECOMMISSIONING);
rm.waitForState(id3, NodeState.DECOMMISSIONED);
nm1.nodeHeartbeat(aaid, 2, ContainerState.RUNNING);