YARN-9486. Docker container exited with failure does not get clean up correctly. Contributed by Eric Yang
This commit is contained in:
parent
b5dcf64f97
commit
79d3d35398
|
@ -95,8 +95,10 @@ public class ContainerCleanup implements Runnable {
|
||||||
+ " killed in store", e);
|
+ " killed in store", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// launch flag will be set to true if process already launched
|
// launch flag will be set to true if process already launched,
|
||||||
boolean alreadyLaunched = !launch.markLaunched();
|
// in process of launching, or failed to launch.
|
||||||
|
boolean alreadyLaunched = !launch.markLaunched() ||
|
||||||
|
launch.isLaunchCompleted();
|
||||||
if (!alreadyLaunched) {
|
if (!alreadyLaunched) {
|
||||||
LOG.info("Container " + containerIdStr + " not launched."
|
LOG.info("Container " + containerIdStr + " not launched."
|
||||||
+ " No cleanup needed to be done");
|
+ " No cleanup needed to be done");
|
||||||
|
|
|
@ -87,7 +87,14 @@ public class ContainerRelaunch extends ContainerLaunch {
|
||||||
Path nmPrivateTruststorePath = (container.getCredentials().getSecretKey(
|
Path nmPrivateTruststorePath = (container.getCredentials().getSecretKey(
|
||||||
AMSecretKeys.YARN_APPLICATION_AM_TRUSTSTORE) == null) ? null :
|
AMSecretKeys.YARN_APPLICATION_AM_TRUSTSTORE) == null) ? null :
|
||||||
getNmPrivateTruststorePath(appIdStr, containerIdStr);
|
getNmPrivateTruststorePath(appIdStr, containerIdStr);
|
||||||
pidFilePath = getPidFilePath(appIdStr, containerIdStr);
|
try {
|
||||||
|
// try to locate existing pid file.
|
||||||
|
pidFilePath = getPidFilePath(appIdStr, containerIdStr);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// reset pid file path if it did not exist.
|
||||||
|
String pidFileSubpath = getPidFileSubpath(appIdStr, containerIdStr);
|
||||||
|
pidFilePath = dirsHandler.getLocalPathForWrite(pidFileSubpath);
|
||||||
|
}
|
||||||
|
|
||||||
LOG.info("Relaunch container with "
|
LOG.info("Relaunch container with "
|
||||||
+ "workDir = " + containerWorkDir.toString()
|
+ "workDir = " + containerWorkDir.toString()
|
||||||
|
|
|
@ -79,6 +79,7 @@ public class TestContainerCleanup {
|
||||||
|
|
||||||
launch = mock(ContainerLaunch.class);
|
launch = mock(ContainerLaunch.class);
|
||||||
launch.containerAlreadyLaunched = new AtomicBoolean(false);
|
launch.containerAlreadyLaunched = new AtomicBoolean(false);
|
||||||
|
launch.completed = new AtomicBoolean(false);
|
||||||
|
|
||||||
launch.pidFilePath = new Path("target/" + containerId.toString() + ".pid");
|
launch.pidFilePath = new Path("target/" + containerId.toString() + ".pid");
|
||||||
when(launch.getContainerPid()).thenReturn(containerId.toString());
|
when(launch.getContainerPid()).thenReturn(containerId.toString());
|
||||||
|
@ -105,4 +106,16 @@ public class TestContainerCleanup {
|
||||||
Assert.assertEquals("signal", ContainerExecutor.Signal.TERM,
|
Assert.assertEquals("signal", ContainerExecutor.Signal.TERM,
|
||||||
captor.getValue().getSignal());
|
captor.getValue().getSignal());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailedExitCleanup() throws Exception {
|
||||||
|
launch.completed.set(true);
|
||||||
|
cleanup.run();
|
||||||
|
ArgumentCaptor<ContainerSignalContext> captor =
|
||||||
|
ArgumentCaptor.forClass(ContainerSignalContext.class);
|
||||||
|
|
||||||
|
verify(executor, Mockito.times(1)).signalContainer(captor.capture());
|
||||||
|
Assert.assertEquals("signal", ContainerExecutor.Signal.TERM,
|
||||||
|
captor.getValue().getSignal());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue