diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index e19379f3bfe..f13ba59b00b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -191,6 +191,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime { private static final Pattern USER_MOUNT_PATTERN = Pattern.compile( "(?<=^|,)([^:\\x00]+):([^:\\x00]+):([a-z]+)"); private static final int HOST_NAME_LENGTH = 64; + private static final String DEFAULT_PROCFS = "/proc"; @InterfaceAudience.Private public static final String ENV_DOCKER_CONTAINER_IMAGE = @@ -1192,24 +1193,15 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime { private void executeLivelinessCheck(ContainerRuntimeContext ctx) throws ContainerExecutionException { - PrivilegedOperation signalOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.SIGNAL_CONTAINER); - signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), - ctx.getExecutionAttribute(USER), Integer.toString( - PrivilegedOperation.RunAsUserCommand.SIGNAL_CONTAINER.getValue()), - ctx.getExecutionAttribute(PID), - Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue())); - signalOp.disableFailureLogging(); - try { - privilegedOperationExecutor.executePrivilegedOperation(null, signalOp, - null, ctx.getContainer().getLaunchContext().getEnvironment(), false, - false); - } catch (PrivilegedOperationException e) { - String msg = "Liveliness check failed for PID: " - + ctx.getExecutionAttribute(PID) + String procFs = ctx.getExecutionAttribute(PROCFS); + if (procFs == null || procFs.isEmpty()) { + procFs = DEFAULT_PROCFS; + } + String pid = ctx.getExecutionAttribute(PID); + if (!new File(procFs + File.separator + pid).exists()) { + String msg = "Liveliness check failed for PID: " + pid + ". Container may have already completed."; - throw new ContainerExecutionException(msg, e.getExitCode(), e.getOutput(), - e.getErrorOutput()); + throw new ContainerExecutionException(msg); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java index 579e03bb5b2..2f4aad49a04 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java @@ -89,4 +89,6 @@ public final class LinuxContainerRuntimeConstants { String.class, "pid"); public static final Attribute SIGNAL = Attribute .attribute(ContainerExecutor.Signal.class, "signal"); + public static final Attribute PROCFS = Attribute.attribute( + String.class, "procfs"); } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java index b6de366d2e7..d85c403f1fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java @@ -54,7 +54,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.Contai import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext; import org.junit.Assert; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; import org.slf4j.Logger; @@ -98,6 +100,7 @@ import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.r import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.NM_PRIVATE_TOKENS_PATH; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PID; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PID_FILE_PATH; +import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PROCFS; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.RESOURCES_OPTIONS; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.RUN_AS_USER; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.SIGNAL; @@ -153,6 +156,9 @@ public class TestDockerContainerRuntime { private final String signalPid = "1234"; private int dockerStopGracePeriod; + @Rule + public TemporaryFolder tempDir = new TemporaryFolder(); + @Before public void setup() { String tmpPath = new StringBuffer(System.getProperty("test.build.data")) @@ -1470,9 +1476,24 @@ public class TestDockerContainerRuntime { } @Test - public void testContainerLivelinessCheck() - throws ContainerExecutionException, PrivilegedOperationException { + public void testContainerLivelinessFileExistsNoException() throws Exception { + File testTempDir = tempDir.newFolder(); + File procPidPath = new File(testTempDir + File.separator + signalPid); + procPidPath.createNewFile(); + procPidPath.deleteOnExit(); + DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( + mockExecutor, mockCGroupsHandler); + builder.setExecutionAttribute(RUN_AS_USER, runAsUser) + .setExecutionAttribute(USER, user) + .setExecutionAttribute(PID, signalPid) + .setExecutionAttribute(SIGNAL, ContainerExecutor.Signal.NULL) + .setExecutionAttribute(PROCFS, testTempDir.getAbsolutePath()); + runtime.initialize(enableMockContainerExecutor(conf), null); + runtime.signalContainer(builder.build()); + } + @Test(expected = ContainerExecutionException.class) + public void testContainerLivelinessNoFileException() throws Exception { DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); builder.setExecutionAttribute(RUN_AS_USER, runAsUser) @@ -1481,15 +1502,6 @@ public class TestDockerContainerRuntime { .setExecutionAttribute(SIGNAL, ContainerExecutor.Signal.NULL); runtime.initialize(enableMockContainerExecutor(conf), null); runtime.signalContainer(builder.build()); - - PrivilegedOperation op = capturePrivilegedOperation(); - Assert.assertEquals(op.getOperationType(), - PrivilegedOperation.OperationType.SIGNAL_CONTAINER); - Assert.assertEquals(runAsUser, op.getArguments().get(0)); - Assert.assertEquals(submittingUser, op.getArguments().get(1)); - Assert.assertEquals("2", op.getArguments().get(2)); - Assert.assertEquals("1234", op.getArguments().get(3)); - Assert.assertEquals("0", op.getArguments().get(4)); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md index c6f965a4106..a2ef6fe9df3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md @@ -378,6 +378,21 @@ Trusted images are allowed to mount external devices such as HDFS via NFS gatewa For [YARN Service HTTPD example](./yarn-service/Examples.html), container-executor.cfg must define centos docker registry to be trusted for the example to run. +Container Reacquisition Requirements +------------------------------------ +On restart, the NodeManager, as part of the NodeManager's recovery process, will +validate that a container is still running by checking for the existence of the +container's PID directory in the /proc filesystem. For security purposes, +operating system administrator may enable the _hidepid_ mount option for the +/proc filesystem. If the _hidepid_ option is enabled, the _yarn_ user's primary +group must be whitelisted by setting the gid mount flag similar to below. +Without the _yarn_ user's primary group whitelisted, container reacquisition +will fail and the container will be killed on NodeManager restart. + +``` +proc /proc proc nosuid,nodev,noexec,hidepid=2,gid=yarn 0 0 +``` + Connecting to a Secure Docker Repository ----------------------------------------