MAPREDUCE-7407. Avoid stopContainer() on dead node (#4779)
This commit is contained in:
parent
6422eaf301
commit
59d3c20118
|
@ -379,27 +379,38 @@ public class ContainerLauncherImpl extends AbstractService implements
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
LOG.info("Processing the event " + event.toString());
|
||||
LOG.info("Processing the event {}", event);
|
||||
|
||||
// Load ContainerManager tokens before creating a connection.
|
||||
// TODO: Do it only once per NodeManager.
|
||||
ContainerId containerID = event.getContainerID();
|
||||
|
||||
Container c = getContainer(event);
|
||||
switch(event.getType()) {
|
||||
|
||||
case CONTAINER_REMOTE_LAUNCH:
|
||||
ContainerRemoteLaunchEvent launchEvent
|
||||
= (ContainerRemoteLaunchEvent) event;
|
||||
c.launch(launchEvent);
|
||||
getContainer(event).launch(launchEvent);
|
||||
break;
|
||||
|
||||
case CONTAINER_REMOTE_CLEANUP:
|
||||
c.kill(event.getDumpContainerThreads());
|
||||
// If the container failed to launch earlier (due to dead node for example),
|
||||
// it has been marked as FAILED and removed from containers during
|
||||
// CONTAINER_REMOTE_LAUNCH event handling.
|
||||
// Skip kill() such container during CONTAINER_REMOTE_CLEANUP as
|
||||
// it is not necessary and could cost 15 minutes delay if the node is dead.
|
||||
if (!containers.containsKey(containerID)) {
|
||||
LOG.info("Skip cleanup of already-removed container {}", containerID);
|
||||
// send killed event to task attempt regardless like in kill().
|
||||
context.getEventHandler().handle(new TaskAttemptEvent(event.getTaskAttemptID(),
|
||||
TaskAttemptEventType.TA_CONTAINER_CLEANED));
|
||||
return;
|
||||
}
|
||||
getContainer(event).kill(event.getDumpContainerThreads());
|
||||
break;
|
||||
|
||||
case CONTAINER_COMPLETED:
|
||||
c.done();
|
||||
getContainer(event).done();
|
||||
break;
|
||||
|
||||
}
|
||||
|
|
|
@ -211,12 +211,9 @@ public class TestContainerLauncherImpl {
|
|||
verify(mockCM).startContainers(any(StartContainersRequest.class));
|
||||
|
||||
LOG.info("inserting cleanup event");
|
||||
ContainerLauncherEvent mockCleanupEvent =
|
||||
mock(ContainerLauncherEvent.class);
|
||||
when(mockCleanupEvent.getType())
|
||||
.thenReturn(EventType.CONTAINER_REMOTE_CLEANUP);
|
||||
when(mockCleanupEvent.getContainerID())
|
||||
.thenReturn(contId);
|
||||
ContainerLauncherEvent mockCleanupEvent = mock(ContainerLauncherEvent.class);
|
||||
when(mockCleanupEvent.getType()).thenReturn(EventType.CONTAINER_REMOTE_CLEANUP);
|
||||
when(mockCleanupEvent.getContainerID()).thenReturn(contId);
|
||||
when(mockCleanupEvent.getTaskAttemptID()).thenReturn(taskAttemptId);
|
||||
when(mockCleanupEvent.getContainerMgrAddress()).thenReturn(cmAddress);
|
||||
ut.handle(mockCleanupEvent);
|
||||
|
@ -284,7 +281,20 @@ public class TestContainerLauncherImpl {
|
|||
|
||||
ut.waitForPoolToIdle();
|
||||
|
||||
verify(mockCM, never()).startContainers(any(StartContainersRequest.class));
|
||||
verify(mockCM).startContainers(any(StartContainersRequest.class));
|
||||
|
||||
LOG.info("inserting cleanup event");
|
||||
ContainerLauncherEvent mockCleanupEvent2 = mock(ContainerLauncherEvent.class);
|
||||
when(mockCleanupEvent2.getType()).thenReturn(EventType.CONTAINER_REMOTE_CLEANUP);
|
||||
when(mockCleanupEvent2.getContainerID()).thenReturn(contId);
|
||||
when(mockCleanupEvent2.getTaskAttemptID()).thenReturn(taskAttemptId);
|
||||
when(mockCleanupEvent2.getContainerMgrAddress()).thenReturn(cmAddress);
|
||||
ut.handle(mockCleanupEvent2);
|
||||
|
||||
ut.waitForPoolToIdle();
|
||||
|
||||
// Verifies stopContainers is called on existing container
|
||||
verify(mockCM).stopContainers(any(StopContainersRequest.class));
|
||||
} finally {
|
||||
ut.stop();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue