YARN-212. NM state machine ignores an APPLICATION_CONTAINER_FINISHED event when it shouldn't. Contributed by Nathan Roberts
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1408812 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4741064250
commit
6db6e00649
|
@ -216,6 +216,9 @@ Release 0.23.5 - UNRELEASED
|
|||
YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails.
|
||||
(jlowe via jeagles)
|
||||
|
||||
YARN-212. NM state machine ignores an APPLICATION_CONTAINER_FINISHED event
|
||||
when it shouldn't (Nathan Roberts via jlowe)
|
||||
|
||||
Release 0.23.4 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -143,6 +143,9 @@ public class ApplicationImpl implements Application {
|
|||
ApplicationState.APPLICATION_RESOURCES_CLEANINGUP),
|
||||
ApplicationEventType.FINISH_APPLICATION,
|
||||
new AppFinishTriggeredTransition())
|
||||
.addTransition(ApplicationState.INITING, ApplicationState.INITING,
|
||||
ApplicationEventType.APPLICATION_CONTAINER_FINISHED,
|
||||
CONTAINER_DONE_TRANSITION)
|
||||
.addTransition(ApplicationState.INITING, ApplicationState.INITING,
|
||||
ApplicationEventType.APPLICATION_LOG_HANDLING_INITED,
|
||||
new AppLogInitDoneTransition())
|
||||
|
|
|
@ -277,6 +277,8 @@ public class ContainerImpl implements Container {
|
|||
// From DONE
|
||||
.addTransition(ContainerState.DONE, ContainerState.DONE,
|
||||
ContainerEventType.KILL_CONTAINER)
|
||||
.addTransition(ContainerState.DONE, ContainerState.DONE,
|
||||
ContainerEventType.INIT_CONTAINER)
|
||||
.addTransition(ContainerState.DONE, ContainerState.DONE,
|
||||
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
|
||||
UPDATE_DIAGNOSTICS_TRANSITION)
|
||||
|
|
|
@ -155,6 +155,60 @@ public class TestApplication {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finished containers properly tracked when only container finishes in APP_INITING
|
||||
*/
|
||||
@Test
|
||||
public void testContainersCompleteDuringAppInit1() {
|
||||
WrappedApplication wa = null;
|
||||
try {
|
||||
wa = new WrappedApplication(3, 314159265358979L, "yak", 1);
|
||||
wa.initApplication();
|
||||
wa.initContainer(-1);
|
||||
assertEquals(ApplicationState.INITING, wa.app.getApplicationState());
|
||||
|
||||
wa.containerFinished(0);
|
||||
assertEquals(ApplicationState.INITING, wa.app.getApplicationState());
|
||||
|
||||
wa.applicationInited();
|
||||
assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState());
|
||||
assertEquals(0, wa.app.getContainers().size());
|
||||
} finally {
|
||||
if (wa != null)
|
||||
wa.finished();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finished containers properly tracked when 1 of several containers finishes in APP_INITING
|
||||
*/
|
||||
@Test
|
||||
public void testContainersCompleteDuringAppInit2() {
|
||||
WrappedApplication wa = null;
|
||||
try {
|
||||
wa = new WrappedApplication(3, 314159265358979L, "yak", 3);
|
||||
wa.initApplication();
|
||||
wa.initContainer(-1);
|
||||
assertEquals(ApplicationState.INITING, wa.app.getApplicationState());
|
||||
|
||||
wa.containerFinished(0);
|
||||
|
||||
assertEquals(ApplicationState.INITING, wa.app.getApplicationState());
|
||||
|
||||
wa.applicationInited();
|
||||
assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState());
|
||||
assertEquals(2, wa.app.getContainers().size());
|
||||
|
||||
wa.containerFinished(1);
|
||||
wa.containerFinished(2);
|
||||
assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState());
|
||||
assertEquals(0, wa.app.getContainers().size());
|
||||
} finally {
|
||||
if (wa != null)
|
||||
wa.finished();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testAppFinishedOnRunningContainers() {
|
||||
|
|
|
@ -56,6 +56,8 @@ import org.apache.hadoop.yarn.event.Dispatcher;
|
|||
import org.apache.hadoop.yarn.event.DrainDispatcher;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent;
|
||||
|
@ -65,6 +67,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
|
|||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationRequestEvent;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEvent;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||
|
@ -209,6 +213,32 @@ public class TestContainer {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked") // mocked generic
|
||||
public void testInitWhileDone() throws Exception {
|
||||
WrappedContainer wc = null;
|
||||
try {
|
||||
wc = new WrappedContainer(6, 314159265358979L, 4344, "yak");
|
||||
wc.initContainer();
|
||||
wc.localizeResources();
|
||||
wc.launchContainer();
|
||||
reset(wc.localizerBus);
|
||||
wc.containerSuccessful();
|
||||
wc.containerResourcesCleanup();
|
||||
assertEquals(ContainerState.DONE, wc.c.getContainerState());
|
||||
// Now in DONE, issue INIT
|
||||
wc.initContainer();
|
||||
// Verify still in DONE
|
||||
assertEquals(ContainerState.DONE, wc.c.getContainerState());
|
||||
verifyCleanupCall(wc);
|
||||
}
|
||||
finally {
|
||||
if (wc != null) {
|
||||
wc.finished();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked") // mocked generic
|
||||
public void testCleanupOnKillRequest() throws Exception {
|
||||
|
@ -506,6 +536,8 @@ public class TestContainer {
|
|||
final EventHandler<ContainersLauncherEvent> launcherBus;
|
||||
final EventHandler<ContainersMonitorEvent> monitorBus;
|
||||
final EventHandler<AuxServicesEvent> auxBus;
|
||||
final EventHandler<ApplicationEvent> appBus;
|
||||
final EventHandler<LogHandlerEvent> LogBus;
|
||||
|
||||
final ContainerLaunchContext ctxt;
|
||||
final ContainerId cId;
|
||||
|
@ -527,10 +559,14 @@ public class TestContainer {
|
|||
launcherBus = mock(EventHandler.class);
|
||||
monitorBus = mock(EventHandler.class);
|
||||
auxBus = mock(EventHandler.class);
|
||||
appBus = mock(EventHandler.class);
|
||||
LogBus = mock(EventHandler.class);
|
||||
dispatcher.register(LocalizationEventType.class, localizerBus);
|
||||
dispatcher.register(ContainersLauncherEventType.class, launcherBus);
|
||||
dispatcher.register(ContainersMonitorEventType.class, monitorBus);
|
||||
dispatcher.register(AuxServicesEventType.class, auxBus);
|
||||
dispatcher.register(ApplicationEventType.class, appBus);
|
||||
dispatcher.register(LogHandlerEventType.class, LogBus);
|
||||
this.user = user;
|
||||
|
||||
ctxt = mock(ContainerLaunchContext.class);
|
||||
|
@ -654,6 +690,11 @@ public class TestContainer {
|
|||
ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS));
|
||||
drainDispatcherEvents();
|
||||
}
|
||||
public void containerResourcesCleanup() {
|
||||
c.handle(new ContainerEvent(cId,
|
||||
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP));
|
||||
drainDispatcherEvents();
|
||||
}
|
||||
|
||||
public void containerFailed(int exitCode) {
|
||||
c.handle(new ContainerExitEvent(cId,
|
||||
|
|
|
@ -319,6 +319,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
|||
this.user, null,
|
||||
ContainerLogsRetentionPolicy.AM_AND_FAILED_CONTAINERS_ONLY, this.acls));
|
||||
|
||||
dispatcher.await();
|
||||
ApplicationEvent expectedInitEvents[] = new ApplicationEvent[]{
|
||||
new ApplicationEvent(
|
||||
application1,
|
||||
|
|
Loading…
Reference in New Issue