diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1a930e8d60f..1e66611399b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -89,6 +89,9 @@ Release 2.4.1 - UNRELEASED verification of public cache files in Windows+local file-system environment. (Varun Vasudev via vinodkv) + YARN-1903. Set exit code and diagnostics when container is killed at + NEW/LOCALIZING state. (Zhijie Shen via jianhe) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java index 126dfcb4349..3077a2a97f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java @@ -343,9 +343,10 @@ private void testContainerManagement(NMClientImpl nmClient, // getContainerStatus can be called after stopContainer try { // O is possible if CLEANUP_CONTAINER is executed too late + // 137 is possible if the container is not terminated but killed testGetContainerStatus(container, i, ContainerState.COMPLETE, "Container killed by the ApplicationMaster.", Arrays.asList( - new Integer[] {143, 0})); + new Integer[] {137, 143, 0})); } catch (YarnException e) { // The exception is possible because, after the container is stopped, // it may be removed from NM's context. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 862e3fa9bcd..50653f5175c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; @@ -141,7 +142,7 @@ ContainerEventType.INIT_CONTAINER, new RequestResourcesTransition()) ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.NEW, ContainerState.DONE, - ContainerEventType.KILL_CONTAINER, CONTAINER_DONE_TRANSITION) + ContainerEventType.KILL_CONTAINER, new KillOnNewTransition()) // From LOCALIZING State .addTransition(ContainerState.LOCALIZING, @@ -760,7 +761,9 @@ public void transition(ContainerImpl container, ContainerEvent event) { container.cleanup(); container.metrics.endInitingContainer(); ContainerKillEvent killEvent = (ContainerKillEvent) event; + container.exitCode = ExitCode.TERMINATED.getExitCode(); container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); + container.diagnostics.append("Container is killed before being launched.\n"); } } @@ -828,7 +831,6 @@ public void transition(ContainerImpl container, ContainerEvent event) { /** * Handle the following transitions: - * - NEW -> DONE upon KILL_CONTAINER * - {LOCALIZATION_FAILED, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE, * KILLING, CONTAINER_CLEANEDUP_AFTER_KILL} * -> DONE upon CONTAINER_RESOURCES_CLEANEDUP @@ -849,6 +851,21 @@ public void transition(ContainerImpl container, ContainerEvent event) { } } + /** + * Handle the following transition: + * - NEW -> DONE upon KILL_CONTAINER + */ + static class KillOnNewTransition extends ContainerDoneTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + ContainerKillEvent killEvent = (ContainerKillEvent) event; + container.exitCode = ExitCode.TERMINATED.getExitCode(); + container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); + container.diagnostics.append("Container is killed before being launched.\n"); + super.transition(container, event); + } + } + /** * Update diagnostics, staying in the same state. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 4dbdcada6b5..8af951873d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -310,6 +310,45 @@ public void testCleanupOnKillRequest() throws Exception { } } } + + @Test + public void testKillOnNew() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(13, 314159265358979L, 4344, "yak"); + assertEquals(ContainerState.NEW, wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(ExitCode.TERMINATED.getExitCode(), + wc.c.cloneAndGetContainerStatus().getExitStatus()); + assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics() + .contains("KillRequest")); + } finally { + if (wc != null) { + wc.finished(); + } + } + } + + @Test + public void testKillOnLocalizing() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(14, 314159265358979L, 4344, "yak"); + wc.initContainer(); + assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + assertEquals(ExitCode.TERMINATED.getExitCode(), + wc.c.cloneAndGetContainerStatus().getExitStatus()); + assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics() + .contains("KillRequest")); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test public void testKillOnLocalizationFailed() throws Exception {