diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index ccb7304be4a..874d3f0e664 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -199,6 +199,9 @@ Release 0.23.5 - UNRELEASED YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails. (jlowe via jeagles) + YARN-212. NM state machine ignores an APPLICATION_CONTAINER_FINISHED event + when it shouldn't (Nathan Roberts via jlowe) + Release 0.23.4 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java index 491cbe48de8..85f7da1d08c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java @@ -143,6 +143,9 @@ public class ApplicationImpl implements Application { ApplicationState.APPLICATION_RESOURCES_CLEANINGUP), ApplicationEventType.FINISH_APPLICATION, new AppFinishTriggeredTransition()) + .addTransition(ApplicationState.INITING, ApplicationState.INITING, + ApplicationEventType.APPLICATION_CONTAINER_FINISHED, + CONTAINER_DONE_TRANSITION) .addTransition(ApplicationState.INITING, ApplicationState.INITING, ApplicationEventType.APPLICATION_LOG_HANDLING_INITED, new AppLogInitDoneTransition()) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index c9802080854..b4752ff8f5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -277,6 +277,8 @@ public class ContainerImpl implements Container { // From DONE .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.KILL_CONTAINER) + .addTransition(ContainerState.DONE, ContainerState.DONE, + ContainerEventType.INIT_CONTAINER) .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java index 3d742cb696f..3dd8d2765a6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java @@ -155,6 +155,60 @@ public class TestApplication { } } + /** + * Finished containers properly tracked when only container finishes in APP_INITING + */ + @Test + public void testContainersCompleteDuringAppInit1() { + WrappedApplication wa = null; + try { + wa = new WrappedApplication(3, 314159265358979L, "yak", 1); + wa.initApplication(); + wa.initContainer(-1); + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.containerFinished(0); + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.applicationInited(); + assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState()); + assertEquals(0, wa.app.getContainers().size()); + } finally { + if (wa != null) + wa.finished(); + } + } + + /** + * Finished containers properly tracked when 1 of several containers finishes in APP_INITING + */ + @Test + public void testContainersCompleteDuringAppInit2() { + WrappedApplication wa = null; + try { + wa = new WrappedApplication(3, 314159265358979L, "yak", 3); + wa.initApplication(); + wa.initContainer(-1); + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.containerFinished(0); + + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.applicationInited(); + assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState()); + assertEquals(2, wa.app.getContainers().size()); + + wa.containerFinished(1); + wa.containerFinished(2); + assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState()); + assertEquals(0, wa.app.getContainers().size()); + } finally { + if (wa != null) + wa.finished(); + } + } + @Test @SuppressWarnings("unchecked") public void testAppFinishedOnRunningContainers() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index cb7c19dc2fa..f4c48bad306 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -56,6 +56,8 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; @@ -65,6 +67,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationRequestEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; @@ -208,6 +212,32 @@ public class TestContainer { } } } + + @Test + @SuppressWarnings("unchecked") // mocked generic + public void testInitWhileDone() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(6, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + wc.launchContainer(); + reset(wc.localizerBus); + wc.containerSuccessful(); + wc.containerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + // Now in DONE, issue INIT + wc.initContainer(); + // Verify still in DONE + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + verifyCleanupCall(wc); + } + finally { + if (wc != null) { + wc.finished(); + } + } + } @Test @SuppressWarnings("unchecked") // mocked generic @@ -506,6 +536,8 @@ public class TestContainer { final EventHandler launcherBus; final EventHandler monitorBus; final EventHandler auxBus; + final EventHandler appBus; + final EventHandler LogBus; final ContainerLaunchContext ctxt; final ContainerId cId; @@ -527,10 +559,14 @@ public class TestContainer { launcherBus = mock(EventHandler.class); monitorBus = mock(EventHandler.class); auxBus = mock(EventHandler.class); + appBus = mock(EventHandler.class); + LogBus = mock(EventHandler.class); dispatcher.register(LocalizationEventType.class, localizerBus); dispatcher.register(ContainersLauncherEventType.class, launcherBus); dispatcher.register(ContainersMonitorEventType.class, monitorBus); dispatcher.register(AuxServicesEventType.class, auxBus); + dispatcher.register(ApplicationEventType.class, appBus); + dispatcher.register(LogHandlerEventType.class, LogBus); this.user = user; ctxt = mock(ContainerLaunchContext.class); @@ -654,6 +690,11 @@ public class TestContainer { ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS)); drainDispatcherEvents(); } + public void containerResourcesCleanup() { + c.handle(new ContainerEvent(cId, + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + drainDispatcherEvents(); + } public void containerFailed(int exitCode) { c.handle(new ContainerExitEvent(cId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 74a3858a5d0..aad7b845c92 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -319,6 +319,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest { this.user, null, ContainerLogsRetentionPolicy.AM_AND_FAILED_CONTAINERS_ONLY, this.acls)); + dispatcher.await(); ApplicationEvent expectedInitEvents[] = new ApplicationEvent[]{ new ApplicationEvent( application1,