diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java index 1cbad707808..ad2f68ab8fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java @@ -28,7 +28,9 @@ import java.util.Collection; import java.util.LinkedList; import java.util.List; +import java.util.concurrent.TimeoutException; +import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.DataInputBuffer; @@ -36,6 +38,7 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; @@ -49,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -404,27 +408,33 @@ private void testNMTokens(Configuration testConf) throws Exception { newContainerToken, attempt1NMToken, false).isEmpty()); } - private void waitForContainerToFinishOnNM(ContainerId containerId) { + private void waitForContainerToFinishOnNM(ContainerId containerId) + throws TimeoutException, InterruptedException { Context nmContext = yarnCluster.getNodeManager(0).getNMContext(); int interval = 4 * 60; // Max time for container token to expire. - Assert.assertNotNull(nmContext.getContainers().containsKey(containerId)); - - // Get the container first, as it may be removed from the Context - // by asynchronous calls. - // This was leading to a flakey test as otherwise the container could - // be removed and end up null. + // If the container is null, then it has already completed and been removed + // from the Context by asynchronous calls. Container waitContainer = nmContext.getContainers().get(containerId); - - while ((interval-- > 0) - && !waitContainer.cloneAndGetContainerStatus() - .getState().equals(ContainerState.COMPLETE)) { + if (waitContainer != null) { try { - LOG.info("Waiting for " + containerId + " to complete."); - Thread.sleep(1000); - } catch (InterruptedException e) { + LOG.info("Waiting for " + containerId + " to get to state " + + ContainerState.COMPLETE); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return ContainerState.COMPLETE.equals( + waitContainer.cloneAndGetContainerStatus().getState()); + } + }, 10, interval); + } catch (TimeoutException te) { + fail("Was waiting for " + containerId + " to get to state " + + ContainerState.COMPLETE + " but was in state " + + waitContainer.cloneAndGetContainerStatus().getState() + + " after the timeout"); } } + // Normally, Containers will be removed from NM context after they are // explicitly acked by RM. Now, manually remove it for testing. yarnCluster.getNodeManager(0).getNodeStatusUpdater()