YARN-9126. Fix container clean up for reinitialization.
Contributed by Chandni Singh
(cherry picked from commit e815fd9c49
)
This commit is contained in:
parent
4e1cef3625
commit
29c9c8a893
|
@ -111,6 +111,7 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
Shell.appendScriptExtension("launch_container");
|
Shell.appendScriptExtension("launch_container");
|
||||||
|
|
||||||
public static final String FINAL_CONTAINER_TOKENS_FILE = "container_tokens";
|
public static final String FINAL_CONTAINER_TOKENS_FILE = "container_tokens";
|
||||||
|
public static final String SYSFS_DIR = "sysfs";
|
||||||
|
|
||||||
private static final String PID_FILE_NAME_FMT = "%s.pid";
|
private static final String PID_FILE_NAME_FMT = "%s.pid";
|
||||||
static final String EXIT_CODE_FILE_SUFFIX = ".exitcode";
|
static final String EXIT_CODE_FILE_SUFFIX = ".exitcode";
|
||||||
|
@ -1722,6 +1723,8 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT));
|
deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT));
|
||||||
// delete TokensPath
|
// delete TokensPath
|
||||||
deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE));
|
deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE));
|
||||||
|
// delete sysfs dir
|
||||||
|
deleteAsUser(new Path(containerWorkDir, SYSFS_DIR));
|
||||||
|
|
||||||
// delete symlinks because launch script will create symlinks again
|
// delete symlinks because launch script will create symlinks again
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -152,25 +151,10 @@ public class ContainersLauncher extends AbstractService
|
||||||
containerLauncher.submit(launch);
|
containerLauncher.submit(launch);
|
||||||
break;
|
break;
|
||||||
case CLEANUP_CONTAINER:
|
case CLEANUP_CONTAINER:
|
||||||
|
cleanup(event, containerId, true);
|
||||||
|
break;
|
||||||
case CLEANUP_CONTAINER_FOR_REINIT:
|
case CLEANUP_CONTAINER_FOR_REINIT:
|
||||||
ContainerLaunch existingLaunch = running.remove(containerId);
|
cleanup(event, containerId, false);
|
||||||
if (existingLaunch == null) {
|
|
||||||
// Container not launched.
|
|
||||||
// triggering KILLING to CONTAINER_CLEANEDUP_AFTER_KILL transition.
|
|
||||||
dispatcher.getEventHandler().handle(
|
|
||||||
new ContainerExitEvent(containerId,
|
|
||||||
ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
|
|
||||||
Shell.WINDOWS ? ContainerExecutor.ExitCode.FORCE_KILLED.getExitCode() :
|
|
||||||
ContainerExecutor.ExitCode.TERMINATED.getExitCode(),
|
|
||||||
"Container terminated before launch."));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup a container whether it is running/killed/completed, so that
|
|
||||||
// no sub-processes are alive.
|
|
||||||
ContainerCleanup cleanup = new ContainerCleanup(context, getConfig(),
|
|
||||||
dispatcher, exec, event.getContainer(), existingLaunch);
|
|
||||||
containerLauncher.submit(cleanup);
|
|
||||||
break;
|
break;
|
||||||
case SIGNAL_CONTAINER:
|
case SIGNAL_CONTAINER:
|
||||||
SignalContainersLauncherEvent signalEvent =
|
SignalContainersLauncherEvent signalEvent =
|
||||||
|
@ -221,4 +205,32 @@ public class ContainersLauncher extends AbstractService
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void cleanup(ContainersLauncherEvent event, ContainerId containerId,
|
||||||
|
boolean async) {
|
||||||
|
ContainerLaunch existingLaunch = running.remove(containerId);
|
||||||
|
if (existingLaunch == null) {
|
||||||
|
// Container not launched.
|
||||||
|
// triggering KILLING to CONTAINER_CLEANEDUP_AFTER_KILL transition.
|
||||||
|
dispatcher.getEventHandler().handle(
|
||||||
|
new ContainerExitEvent(containerId,
|
||||||
|
ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
|
||||||
|
Shell.WINDOWS ?
|
||||||
|
ContainerExecutor.ExitCode.FORCE_KILLED.getExitCode() :
|
||||||
|
ContainerExecutor.ExitCode.TERMINATED.getExitCode(),
|
||||||
|
"Container terminated before launch."));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup a container whether it is running/killed/completed, so that
|
||||||
|
// no sub-processes are alive.
|
||||||
|
ContainerCleanup cleanup = new ContainerCleanup(context, getConfig(),
|
||||||
|
dispatcher, exec, event.getContainer(), existingLaunch);
|
||||||
|
if (async) {
|
||||||
|
containerLauncher.submit(cleanup);
|
||||||
|
} else {
|
||||||
|
cleanup.run();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManag
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.InjectMocks;
|
import org.mockito.InjectMocks;
|
||||||
|
@ -39,8 +40,10 @@ import org.mockito.Mockito;
|
||||||
import org.mockito.MockitoAnnotations;
|
import org.mockito.MockitoAnnotations;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
@ -212,11 +215,14 @@ public class TestContainersLauncher {
|
||||||
|
|
||||||
when(event.getType())
|
when(event.getType())
|
||||||
.thenReturn(ContainersLauncherEventType.CLEANUP_CONTAINER_FOR_REINIT);
|
.thenReturn(ContainersLauncherEventType.CLEANUP_CONTAINER_FOR_REINIT);
|
||||||
assertEquals(1, dummyMap.size());
|
final List<ContainerId> cleanedContainers = new ArrayList<>();
|
||||||
|
doAnswer(invocation -> {
|
||||||
|
cleanedContainers.add((ContainerId)invocation.getArguments()[1]);
|
||||||
|
return null;
|
||||||
|
}).when(spy).cleanup(any(), any(), anyBoolean());
|
||||||
spy.handle(event);
|
spy.handle(event);
|
||||||
assertEquals(0, dummyMap.size());
|
Assert.assertEquals("container not cleaned", containerId,
|
||||||
Mockito.verify(containerLauncher, Mockito.times(1))
|
cleanedContainers.get(0));
|
||||||
.submit(Mockito.any(ContainerCleanup.class));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue