YARN-5566. Client-side NM graceful decom is not triggered when jobs finish. Addendum to fix test flakiness. (Robert Kanter via kasha)
This commit is contained in:
parent
912631a226
commit
7a27b2a82f
|
@ -636,6 +636,9 @@ public class MockRM extends ResourceManager {
|
||||||
public void waitForState(NodeId nodeId, NodeState finalState)
|
public void waitForState(NodeId nodeId, NodeState finalState)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
RMNode node = getRMContext().getRMNodes().get(nodeId);
|
RMNode node = getRMContext().getRMNodes().get(nodeId);
|
||||||
|
if (node == null) {
|
||||||
|
node = getRMContext().getInactiveRMNodes().get(nodeId);
|
||||||
|
}
|
||||||
Assert.assertNotNull("node shouldn't be null", node);
|
Assert.assertNotNull("node shouldn't be null", node);
|
||||||
int timeWaiting = 0;
|
int timeWaiting = 0;
|
||||||
while (!finalState.equals(node.getState())) {
|
while (!finalState.equals(node.getState())) {
|
||||||
|
|
|
@ -253,19 +253,17 @@ public class TestResourceTrackerService extends NodeLabelTestBase {
|
||||||
rm.waitForState(nm3.getNodeId(), NodeState.DECOMMISSIONING);
|
rm.waitForState(nm3.getNodeId(), NodeState.DECOMMISSIONING);
|
||||||
|
|
||||||
nodeHeartbeat1 = nm1.nodeHeartbeat(true);
|
nodeHeartbeat1 = nm1.nodeHeartbeat(true);
|
||||||
rm.waitForState(nm1.getNodeId(), NodeState.RUNNING);
|
|
||||||
nodeHeartbeat2 = nm2.nodeHeartbeat(true);
|
nodeHeartbeat2 = nm2.nodeHeartbeat(true);
|
||||||
rm.waitForState(nm2.getNodeId(), NodeState.DECOMMISSIONED);
|
|
||||||
nodeHeartbeat3 = nm3.nodeHeartbeat(true);
|
nodeHeartbeat3 = nm3.nodeHeartbeat(true);
|
||||||
rm.waitForState(nm3.getNodeId(), NodeState.DECOMMISSIONED);
|
|
||||||
|
|
||||||
checkDecommissionedNMCount(rm, metricCount + 2);
|
checkDecommissionedNMCount(rm, metricCount + 2);
|
||||||
|
rm.waitForState(nm2.getNodeId(), NodeState.DECOMMISSIONED);
|
||||||
|
rm.waitForState(nm3.getNodeId(), NodeState.DECOMMISSIONED);
|
||||||
|
|
||||||
nodeHeartbeat1 = nm1.nodeHeartbeat(true);
|
|
||||||
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat1.getNodeAction()));
|
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat1.getNodeAction()));
|
||||||
nodeHeartbeat2 = nm2.nodeHeartbeat(true);
|
nodeHeartbeat2 = nm2.nodeHeartbeat(true);
|
||||||
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat2.getNodeAction());
|
|
||||||
nodeHeartbeat3 = nm3.nodeHeartbeat(true);
|
nodeHeartbeat3 = nm3.nodeHeartbeat(true);
|
||||||
|
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat2.getNodeAction());
|
||||||
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat3.getNodeAction());
|
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat3.getNodeAction());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,8 +304,8 @@ public class TestResourceTrackerService extends NodeLabelTestBase {
|
||||||
// host1 should be DECOMMISSIONING due to running containers.
|
// host1 should be DECOMMISSIONING due to running containers.
|
||||||
// host3 should become DECOMMISSIONED.
|
// host3 should become DECOMMISSIONED.
|
||||||
nm1.nodeHeartbeat(true);
|
nm1.nodeHeartbeat(true);
|
||||||
rm.waitForState(id1, NodeState.DECOMMISSIONING);
|
|
||||||
nm3.nodeHeartbeat(true);
|
nm3.nodeHeartbeat(true);
|
||||||
|
rm.waitForState(id1, NodeState.DECOMMISSIONING);
|
||||||
rm.waitForState(id3, NodeState.DECOMMISSIONED);
|
rm.waitForState(id3, NodeState.DECOMMISSIONED);
|
||||||
nm1.nodeHeartbeat(aaid, 2, ContainerState.RUNNING);
|
nm1.nodeHeartbeat(aaid, 2, ContainerState.RUNNING);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue