YARN-4615. Fix random test failure in TestAbstractYarnScheduler#testResourceRequestRecoveryToTheRightAppAttempt. (Sunil G via rohithsharmaks)

This commit is contained in:
Rohith Sharma K S 2016-02-01 10:43:56 +05:30
parent 772ea7b41b
commit 2673cbaf55
3 changed files with 30 additions and 10 deletions

View File

@ -178,6 +178,9 @@ Release 2.9.0 - UNRELEASED
YARN-4543. Fix random test failure in TestNodeStatusUpdater.testStopReentrant YARN-4543. Fix random test failure in TestNodeStatusUpdater.testStopReentrant
(Akihiro Suda via rohithsharmaks) (Akihiro Suda via rohithsharmaks)
YARN-4615. Fix random test failure in TestAbstractYarnScheduler#testResource
RequestRecoveryToTheRightAppAttempt. (Sunil G via rohithsharmaks)
Release 2.8.0 - UNRELEASED Release 2.8.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -202,15 +202,33 @@ public void waitForState(ApplicationAttemptId attemptId,
public void waitForContainerState(ContainerId containerId, public void waitForContainerState(ContainerId containerId,
RMContainerState state) throws Exception { RMContainerState state) throws Exception {
int timeoutSecs = 0; // This method will assert if state is not expected after timeout.
RMContainer container = getResourceScheduler().getRMContainer(containerId); Assert.assertTrue(waitForContainerState(containerId, state, 8 * 1000));
while ((container == null || container.getState() != state)
&& timeoutSecs++ < 40) {
System.out.println(
"Waiting for" + containerId + " state to be:" + state.name());
Thread.sleep(200);
} }
Assert.assertTrue(container.getState() == state);
public boolean waitForContainerState(ContainerId containerId,
RMContainerState containerState, int timeoutMillisecs) throws Exception {
RMContainer container = getResourceScheduler().getRMContainer(containerId);
int timeoutSecs = 0;
while (((container == null) || !containerState.equals(container.getState()))
&& timeoutSecs++ < timeoutMillisecs / 100) {
if(container == null){
container = getResourceScheduler().getRMContainer(containerId);
}
System.out.println("Container : " + containerId +
" Waiting for state : " + containerState);
Thread.sleep(100);
if (timeoutMillisecs <= timeoutSecs * 100) {
return false;
}
}
System.out.println("Container State is : " + container.getState());
Assert.assertEquals("Container state is not correct (timedout)",
containerState, container.getState());
return true;
} }
public void waitForContainerAllocated(MockNM nm, ContainerId containerId) public void waitForContainerAllocated(MockNM nm, ContainerId containerId)

View File

@ -52,10 +52,8 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerPreemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
@ -568,7 +566,8 @@ public void testResourceRequestRecoveryToTheRightAppAttempt()
// AM crashes, and a new app-attempt gets created // AM crashes, and a new app-attempt gets created
node.nodeHeartbeat(applicationAttemptOneID, 1, ContainerState.COMPLETE); node.nodeHeartbeat(applicationAttemptOneID, 1, ContainerState.COMPLETE);
rm.waitForState(node, am1ContainerID, RMContainerState.COMPLETED); rm.waitForContainerState(am1ContainerID, RMContainerState.COMPLETED,
30 * 1000);
RMAppAttempt rmAppAttempt2 = MockRM.waitForAttemptScheduled(rmApp, rm); RMAppAttempt rmAppAttempt2 = MockRM.waitForAttemptScheduled(rmApp, rm);
ApplicationAttemptId applicationAttemptTwoID = ApplicationAttemptId applicationAttemptTwoID =
rmAppAttempt2.getAppAttemptId(); rmAppAttempt2.getAppAttemptId();