YARN-1903. Set exit code and diagnostics when container is killed at NEW/LOCALIZING state. Contributed by Zhijie Shen

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1586522 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jian He 2014-04-11 01:26:36 +00:00
parent 3b008baf35
commit ed78328d50
4 changed files with 63 additions and 3 deletions

View File

@ -104,6 +104,9 @@ Release 2.4.1 - UNRELEASED
verification of public cache files in Windows+local file-system environment. verification of public cache files in Windows+local file-system environment.
(Varun Vasudev via vinodkv) (Varun Vasudev via vinodkv)
YARN-1903. Set exit code and diagnostics when container is killed at
NEW/LOCALIZING state. (Zhijie Shen via jianhe)
Release 2.4.0 - 2014-04-07 Release 2.4.0 - 2014-04-07
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -343,9 +343,10 @@ public class TestNMClient {
// getContainerStatus can be called after stopContainer // getContainerStatus can be called after stopContainer
try { try {
// O is possible if CLEANUP_CONTAINER is executed too late // O is possible if CLEANUP_CONTAINER is executed too late
// 137 is possible if the container is not terminated but killed
testGetContainerStatus(container, i, ContainerState.COMPLETE, testGetContainerStatus(container, i, ContainerState.COMPLETE,
"Container killed by the ApplicationMaster.", Arrays.asList( "Container killed by the ApplicationMaster.", Arrays.asList(
new Integer[] {143, 0})); new Integer[] {137, 143, 0}));
} catch (YarnException e) { } catch (YarnException e) {
// The exception is possible because, after the container is stopped, // The exception is possible because, after the container is stopped,
// it may be removed from NM's context. // it may be removed from NM's context.

View File

@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent;
@ -141,7 +142,7 @@ public class ContainerImpl implements Container {
ContainerEventType.UPDATE_DIAGNOSTICS_MSG, ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
UPDATE_DIAGNOSTICS_TRANSITION) UPDATE_DIAGNOSTICS_TRANSITION)
.addTransition(ContainerState.NEW, ContainerState.DONE, .addTransition(ContainerState.NEW, ContainerState.DONE,
ContainerEventType.KILL_CONTAINER, CONTAINER_DONE_TRANSITION) ContainerEventType.KILL_CONTAINER, new KillOnNewTransition())
// From LOCALIZING State // From LOCALIZING State
.addTransition(ContainerState.LOCALIZING, .addTransition(ContainerState.LOCALIZING,
@ -760,7 +761,9 @@ public class ContainerImpl implements Container {
container.cleanup(); container.cleanup();
container.metrics.endInitingContainer(); container.metrics.endInitingContainer();
ContainerKillEvent killEvent = (ContainerKillEvent) event; ContainerKillEvent killEvent = (ContainerKillEvent) event;
container.exitCode = ExitCode.TERMINATED.getExitCode();
container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
container.diagnostics.append("Container is killed before being launched.\n");
} }
} }
@ -828,7 +831,6 @@ public class ContainerImpl implements Container {
/** /**
* Handle the following transitions: * Handle the following transitions:
* - NEW -> DONE upon KILL_CONTAINER
* - {LOCALIZATION_FAILED, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE, * - {LOCALIZATION_FAILED, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE,
* KILLING, CONTAINER_CLEANEDUP_AFTER_KILL} * KILLING, CONTAINER_CLEANEDUP_AFTER_KILL}
* -> DONE upon CONTAINER_RESOURCES_CLEANEDUP * -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
@ -849,6 +851,21 @@ public class ContainerImpl implements Container {
} }
} }
/**
* Handle the following transition:
* - NEW -> DONE upon KILL_CONTAINER
*/
static class KillOnNewTransition extends ContainerDoneTransition {
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
ContainerKillEvent killEvent = (ContainerKillEvent) event;
container.exitCode = ExitCode.TERMINATED.getExitCode();
container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
container.diagnostics.append("Container is killed before being launched.\n");
super.transition(container, event);
}
}
/** /**
* Update diagnostics, staying in the same state. * Update diagnostics, staying in the same state.
*/ */

View File

@ -310,6 +310,45 @@ public class TestContainer {
} }
} }
} }
@Test
public void testKillOnNew() throws Exception {
WrappedContainer wc = null;
try {
wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
assertEquals(ContainerState.NEW, wc.c.getContainerState());
wc.killContainer();
assertEquals(ContainerState.DONE, wc.c.getContainerState());
assertEquals(ExitCode.TERMINATED.getExitCode(),
wc.c.cloneAndGetContainerStatus().getExitStatus());
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
.contains("KillRequest"));
} finally {
if (wc != null) {
wc.finished();
}
}
}
@Test
public void testKillOnLocalizing() throws Exception {
WrappedContainer wc = null;
try {
wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
wc.initContainer();
assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
wc.killContainer();
assertEquals(ContainerState.KILLING, wc.c.getContainerState());
assertEquals(ExitCode.TERMINATED.getExitCode(),
wc.c.cloneAndGetContainerStatus().getExitStatus());
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
.contains("KillRequest"));
} finally {
if (wc != null) {
wc.finished();
}
}
}
@Test @Test
public void testKillOnLocalizationFailed() throws Exception { public void testKillOnLocalizationFailed() throws Exception {