YARN-733. Fixed TestNMClient from failing occasionally. Contributed by Zhijie Shen.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1488618 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a2c4233004
commit
3dce234ed9
|
@ -397,6 +397,9 @@ Release 2.1.0-beta - UNRELEASED
|
|||
YARN-578. Fixed NM to use SecureIOUtils for reading and aggregating logs.
|
||||
(Omkar Vinit Joshi via vinodkv)
|
||||
|
||||
YARN-733. Fixed TestNMClient from failing occasionally. (Zhijie Shen via
|
||||
vinodkv)
|
||||
|
||||
BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS
|
||||
|
||||
YARN-158. Yarn creating package-info.java must not depend on sh.
|
||||
|
|
|
@ -64,6 +64,17 @@ import org.apache.hadoop.yarn.util.Records;
|
|||
* continue to run even after this client is stopped and till the application
|
||||
* runs at which point ResourceManager will forcefully kill them.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Note that the blocking APIs ensure the RPC calls to <code>NodeManager</code>
|
||||
* are executed immediately, and the responses are received before these APIs
|
||||
* return. However, when {@link #startContainer} or {@link #stopContainer}
|
||||
* returns, <code>NodeManager</code> may still need some time to either start
|
||||
* or stop the container because of its asynchronous implementation. Therefore,
|
||||
* {@link #getContainerStatus} is likely to return a transit container status
|
||||
* if it is executed immediately after {@link #startContainer} or
|
||||
* {@link #stopContainer}.
|
||||
* </p>
|
||||
*/
|
||||
public class NMClientImpl extends AbstractService implements NMClient {
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.hadoop.yarn.client;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -228,7 +228,7 @@ public class TestNMClient {
|
|||
}
|
||||
|
||||
private void testContainerManagement(NMClientImpl nmClient,
|
||||
Set<Container> containers) throws IOException {
|
||||
Set<Container> containers) throws YarnRemoteException, IOException {
|
||||
int size = containers.size();
|
||||
int i = 0;
|
||||
for (Container container : containers) {
|
||||
|
@ -271,17 +271,9 @@ public class TestNMClient {
|
|||
|
||||
// leave one container unclosed
|
||||
if (++i < size) {
|
||||
try {
|
||||
ContainerStatus status = nmClient.getContainerStatus(container.getId(),
|
||||
container.getNodeId(), container.getContainerToken());
|
||||
// verify the container is started and in good shape
|
||||
assertEquals(container.getId(), status.getContainerId());
|
||||
assertEquals(ContainerState.RUNNING, status.getState());
|
||||
assertEquals("", status.getDiagnostics());
|
||||
assertEquals(-1000, status.getExitStatus());
|
||||
} catch (YarnRemoteException e) {
|
||||
fail("Exception is not expected");
|
||||
}
|
||||
// NodeManager may still need some time to make the container started
|
||||
testGetContainerStatus(container, i, ContainerState.RUNNING, "",
|
||||
-1000);
|
||||
|
||||
try {
|
||||
nmClient.stopContainer(container.getId(), container.getNodeId(),
|
||||
|
@ -291,18 +283,8 @@ public class TestNMClient {
|
|||
}
|
||||
|
||||
// getContainerStatus can be called after stopContainer
|
||||
try {
|
||||
ContainerStatus status = nmClient.getContainerStatus(
|
||||
container.getId(), container.getNodeId(),
|
||||
container.getContainerToken());
|
||||
assertEquals(container.getId(), status.getContainerId());
|
||||
assertEquals(ContainerState.RUNNING, status.getState());
|
||||
assertTrue("" + i, status.getDiagnostics().contains(
|
||||
"Container killed by the ApplicationMaster."));
|
||||
assertEquals(-1000, status.getExitStatus());
|
||||
} catch (YarnRemoteException e) {
|
||||
fail("Exception is not expected");
|
||||
}
|
||||
testGetContainerStatus(container, i, ContainerState.COMPLETE,
|
||||
"Container killed by the ApplicationMaster.", 143);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -315,4 +297,28 @@ public class TestNMClient {
|
|||
}
|
||||
}
|
||||
|
||||
private void testGetContainerStatus(Container container, int index,
|
||||
ContainerState state, String diagnostics, int exitStatus)
|
||||
throws YarnRemoteException, IOException {
|
||||
while (true) {
|
||||
try {
|
||||
ContainerStatus status = nmClient.getContainerStatus(
|
||||
container.getId(), container.getNodeId(),
|
||||
container.getContainerToken());
|
||||
// NodeManager may still need some time to get the stable
|
||||
// container status
|
||||
if (status.getState() == state) {
|
||||
assertEquals(container.getId(), status.getContainerId());
|
||||
assertTrue("" + index + ": " + status.getDiagnostics(),
|
||||
status.getDiagnostics().contains(diagnostics));
|
||||
assertEquals(exitStatus, status.getExitStatus());
|
||||
break;
|
||||
}
|
||||
Thread.sleep(100);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue