YARN-5876. TestResourceTrackerService#testGracefulDecommissionWithApp fails intermittently on trunk. (Robert Kanter via Yufei Gu)

(cherry picked from commit 0b77262890)
This commit is contained in:
Yufei Gu 2017-06-23 13:26:03 -07:00
parent 0719345909
commit 4c59b446e2
1 changed files with 20 additions and 7 deletions

View File

@ -120,7 +120,7 @@ public class MockRM extends ResourceManager {
private static final int SECOND = 1000;
private static final int TIMEOUT_MS_FOR_ATTEMPT = 40 * SECOND;
private static final int TIMEOUT_MS_FOR_APP_REMOVED = 40 * SECOND;
private static final int TIMEOUT_MS_FOR_CONTAINER_AND_NODE = 10 * SECOND;
private static final int TIMEOUT_MS_FOR_CONTAINER_AND_NODE = 20 * SECOND;
private static final int WAIT_MS_PER_LOOP = 10;
private final boolean useNullRMNodeLabelsManager;
@ -853,9 +853,17 @@ public class MockRM extends ResourceManager {
drainEventsImplicitly();
}
private RMNode getRMNode(NodeId nodeId) {
RMNode node = getRMContext().getRMNodes().get(nodeId);
if (node == null) {
node = getRMContext().getInactiveRMNodes().get(nodeId);
}
return node;
}
/**
* Wait until a node has reached a specified state.
* The timeout is 10 seconds.
* The timeout is 20 seconds.
* @param nodeId the id of a node
* @param finalState the node state waited
* @throws InterruptedException
@ -864,12 +872,17 @@ public class MockRM extends ResourceManager {
public void waitForState(NodeId nodeId, NodeState finalState)
throws InterruptedException {
drainEventsImplicitly();
RMNode node = getRMContext().getRMNodes().get(nodeId);
if (node == null) {
node = getRMContext().getInactiveRMNodes().get(nodeId);
}
Assert.assertNotNull("node shouldn't be null", node);
int timeWaiting = 0;
RMNode node = getRMNode(nodeId);
while (node == null) {
if (timeWaiting >= TIMEOUT_MS_FOR_CONTAINER_AND_NODE) {
break;
}
node = getRMNode(nodeId);
Thread.sleep(WAIT_MS_PER_LOOP);
timeWaiting += WAIT_MS_PER_LOOP;
}
Assert.assertNotNull("node shouldn't be null (timedout)", node);
while (!finalState.equals(node.getState())) {
if (timeWaiting >= TIMEOUT_MS_FOR_CONTAINER_AND_NODE) {
break;