YARN-7729. Add support for setting Docker PID namespace mode. (Contributed by Billie Rinaldi)

This commit is contained in:
Eric Yang 2018-01-22 16:33:38 -05:00
parent 22ee6f77e5
commit 97fe3cc187
9 changed files with 348 additions and 4 deletions

View File

@ -1831,6 +1831,14 @@ public class YarnConfiguration extends Configuration {
public static final String DEFAULT_NM_DOCKER_DEFAULT_CONTAINER_NETWORK =
"host";
/** Allow host pid namespace for containers. Use with care. */
public static final String NM_DOCKER_ALLOW_HOST_PID_NAMESPACE =
DOCKER_CONTAINER_RUNTIME_PREFIX + "host-pid-namespace.allowed";
/** Host pid namespace for containers is disabled by default. */
public static final boolean DEFAULT_NM_DOCKER_ALLOW_HOST_PID_NAMESPACE =
false;
/**
* Whether or not users are allowed to request that Docker containers honor
* the debug deletion delay. This is useful for troubleshooting Docker

View File

@ -1677,6 +1677,14 @@
<value>host</value>
</property>
<property>
<description>This configuration setting determines whether the host's PID
namespace is allowed for docker containers on this cluster.
Use with care.</description>
<name>yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed</name>
<value>false</value>
</property>
<property>
<description>Property to enable docker user remapping</description>
<name>yarn.nodemanager.runtime.linux.docker.enable-userremapping.allowed</name>

View File

@ -113,6 +113,17 @@ import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.r
* property.
* </li>
* <li>
* {@code YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE}
* controls which PID namespace will be used by the Docker container. By
* default, each Docker container has its own PID namespace. To share the
* namespace of the host, the
* {@code yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed}
* property must be set to {@code true}. If the host PID namespace is
* allowed and this environment variable is set to {@code host}, the
* Docker container will share the host's PID namespace. No other value is
* allowed.
* </li>
* <li>
* {@code YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME} sets the
* hostname to be used by the Docker container. If not specified, a
* hostname will be derived from the container ID.
@ -192,6 +203,9 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
public static final String ENV_DOCKER_CONTAINER_NETWORK =
"YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK";
@InterfaceAudience.Private
public static final String ENV_DOCKER_CONTAINER_PID_NAMESPACE =
"YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE";
@InterfaceAudience.Private
public static final String ENV_DOCKER_CONTAINER_HOSTNAME =
"YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME";
@InterfaceAudience.Private
@ -480,6 +494,47 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
throw new ContainerExecutionException(msg);
}
/**
* Return whether the YARN container is allowed to run using the host's PID
* namespace for the Docker container. For this to be allowed, the submitting
* user must request the feature and the feature must be enabled on the
* cluster.
*
* @param container the target YARN container
* @return whether host pid namespace is requested and allowed
* @throws ContainerExecutionException if host pid namespace is requested
* but is not allowed
*/
private boolean allowHostPidNamespace(Container container)
throws ContainerExecutionException {
Map<String, String> environment = container.getLaunchContext()
.getEnvironment();
String pidNamespace = environment.get(ENV_DOCKER_CONTAINER_PID_NAMESPACE);
if (pidNamespace == null) {
return false;
}
if (!pidNamespace.equalsIgnoreCase("host")) {
LOG.warn("NOT requesting PID namespace. Value of " +
ENV_DOCKER_CONTAINER_PID_NAMESPACE + "is invalid: " + pidNamespace);
return false;
}
boolean hostPidNamespaceEnabled = conf.getBoolean(
YarnConfiguration.NM_DOCKER_ALLOW_HOST_PID_NAMESPACE,
YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_HOST_PID_NAMESPACE);
if (!hostPidNamespaceEnabled) {
String message = "Host pid namespace being requested but this is not "
+ "enabled on this cluster";
LOG.warn(message);
throw new ContainerExecutionException(message);
}
return true;
}
public static void validateHostname(String hostname) throws
ContainerExecutionException {
if (hostname != null && !hostname.isEmpty()) {
@ -798,6 +853,10 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
}
}
if (allowHostPidNamespace(container)) {
runCommand.setPidNamespace("host");
}
if (allowPrivilegedContainerExecution(container)) {
runCommand.setPrivileged();
}

View File

@ -56,6 +56,11 @@ public class DockerRunCommand extends DockerCommand {
return this;
}
public DockerRunCommand setPidNamespace(String type) {
super.addCommandArguments("pid", type);
return this;
}
public DockerRunCommand addMountLocation(String sourcePath, String
destinationPath, boolean createSource) {
boolean sourceExists = new File(sourcePath).exists();

View File

@ -218,6 +218,10 @@ const char *get_docker_error_message(const int error_code) {
return "Invalid docker volume name";
case INVALID_DOCKER_VOLUME_COMMAND:
return "Invalid docker volume command";
case PID_HOST_DISABLED:
return "Host pid namespace is disabled";
case INVALID_PID_NAMESPACE:
return "Invalid pid namespace";
default:
return "Unknown error";
}
@ -780,6 +784,52 @@ static int set_network(const struct configuration *command_config,
return ret;
}
static int set_pid_namespace(const struct configuration *command_config,
const struct configuration *conf, char *out,
const size_t outlen) {
size_t tmp_buffer_size = 1024;
char *tmp_buffer = (char *) alloc_and_clear_memory(tmp_buffer_size, sizeof(char));
char *value = get_configuration_value("pid", DOCKER_COMMAND_FILE_SECTION,
command_config);
char *pid_host_enabled = get_configuration_value("docker.host-pid-namespace.enabled",
CONTAINER_EXECUTOR_CFG_DOCKER_SECTION, conf);
int ret = 0;
if (value != NULL) {
if (strcmp(value, "host") == 0) {
if (pid_host_enabled != NULL) {
if (strcmp(pid_host_enabled, "1") == 0 ||
strcasecmp(pid_host_enabled, "True") == 0) {
ret = add_to_buffer(out, outlen, "--pid='host' ");
if (ret != 0) {
ret = BUFFER_TOO_SMALL;
}
} else {
fprintf(ERRORFILE, "Host pid namespace is disabled\n");
ret = PID_HOST_DISABLED;
goto free_and_exit;
}
} else {
fprintf(ERRORFILE, "Host pid namespace is disabled\n");
ret = PID_HOST_DISABLED;
goto free_and_exit;
}
} else {
fprintf(ERRORFILE, "Invalid pid namespace\n");
ret = INVALID_PID_NAMESPACE;
}
}
free_and_exit:
free(tmp_buffer);
free(value);
free(pid_host_enabled);
if (ret != 0) {
memset(out, 0, outlen);
}
return ret;
}
static int set_capabilities(const struct configuration *command_config,
const struct configuration *conf, char *out,
const size_t outlen) {
@ -1148,6 +1198,11 @@ int get_docker_run_command(const char *command_file, const struct configuration
return ret;
}
ret = set_pid_namespace(&command_config, conf, out, outlen);
if (ret != 0) {
return ret;
}
ret = add_ro_mounts(&command_config, conf, out, outlen);
if (ret != 0) {
return ret;

View File

@ -55,7 +55,9 @@ enum docker_error_codes {
INVALID_DOCKER_KILL_COMMAND,
INVALID_DOCKER_VOLUME_DRIVER,
INVALID_DOCKER_VOLUME_NAME,
INVALID_DOCKER_VOLUME_COMMAND
INVALID_DOCKER_VOLUME_COMMAND,
PID_HOST_DISABLED,
INVALID_PID_NAMESPACE
};
/**

View File

@ -454,6 +454,102 @@ namespace ContainerExecutor {
ASSERT_EQ(0, strlen(buff));
}
TEST_F(TestDockerUtil, test_set_pid_namespace) {
struct configuration container_cfg, cmd_cfg;
const int buff_len = 1024;
char buff[buff_len];
int ret = 0;
std::string container_executor_cfg_contents[] = {"[docker]\n docker.host-pid-namespace.enabled=1",
"[docker]\n docker.host-pid-namespace.enabled=true",
"[docker]\n docker.host-pid-namespace.enabled=True",
"[docker]\n docker.host-pid-namespace.enabled=0",
"[docker]\n docker.host-pid-namespace.enabled=false",
"[docker]\n"};
std::vector<std::pair<std::string, std::string> > file_cmd_vec;
std::vector<std::pair<std::string, int> > bad_file_cmd_vec;
std::vector<std::pair<std::string, std::string> >::const_iterator itr;
std::vector<std::pair<std::string, int> >::const_iterator itr2;
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run\n pid=host", "--pid='host' "));
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run", ""));
bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
"[docker-command-execution]\n docker-command=run\n pid=other",
static_cast<int>(INVALID_PID_NAMESPACE)));
for (int i = 1; i < 3; ++i) {
write_container_executor_cfg(container_executor_cfg_contents[0]);
ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
if (ret != 0) {
FAIL();
}
for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) {
memset(buff, 0, buff_len);
write_command_file(itr->first);
ret = read_config(docker_command_file.c_str(), &cmd_cfg);
if (ret != 0) {
FAIL();
}
ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len);
ASSERT_EQ(0, ret);
ASSERT_STREQ(itr->second.c_str(), buff);
}
for (itr2 = bad_file_cmd_vec.begin(); itr2 != bad_file_cmd_vec.end(); ++itr2) {
memset(buff, 0, buff_len);
write_command_file(itr2->first);
ret = read_config(docker_command_file.c_str(), &cmd_cfg);
if (ret != 0) {
FAIL();
}
ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len);
ASSERT_EQ(itr2->second, ret);
ASSERT_EQ(0, strlen(buff));
}
}
// check default case and when it's turned off
for (int i = 3; i < 6; ++i) {
write_container_executor_cfg(container_executor_cfg_contents[i]);
ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
if (ret != 0) {
FAIL();
}
file_cmd_vec.clear();
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run", ""));
for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) {
memset(buff, 0, buff_len);
write_command_file(itr->first);
ret = read_config(docker_command_file.c_str(), &cmd_cfg);
if (ret != 0) {
FAIL();
}
ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len);
ASSERT_EQ(0, ret);
ASSERT_STREQ(itr->second.c_str(), buff);
}
bad_file_cmd_vec.clear();
bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
"[docker-command-execution]\n docker-command=run\n pid=other",
static_cast<int>(INVALID_PID_NAMESPACE)));
bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
"[docker-command-execution]\n docker-command=run\n pid=host",
static_cast<int>(PID_HOST_DISABLED)));
for (itr2 = bad_file_cmd_vec.begin(); itr2 != bad_file_cmd_vec.end(); ++itr2) {
memset(buff, 0, buff_len);
write_command_file(itr2->first);
ret = read_config(docker_command_file.c_str(), &cmd_cfg);
if (ret != 0) {
FAIL();
}
ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len);
ASSERT_EQ(itr2->second, ret);
ASSERT_EQ(0, strlen(buff));
}
}
}
TEST_F(TestDockerUtil, test_check_mount_permitted) {
const char *permitted_mounts[] = {"/etc", "/usr/bin/cut", "/tmp/", NULL};
std::vector<std::pair<std::string, int> > test_data;
@ -530,7 +626,7 @@ namespace ContainerExecutor {
// check default case and when it's turned off
for (int i = 3; i < 5; ++i) {
for (int i = 3; i < 6; ++i) {
write_container_executor_cfg(container_executor_cfg_contents[i]);
ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
if (ret != 0) {

View File

@ -693,6 +693,107 @@ public class TestDockerContainerRuntime {
}
}
@Test
public void testLaunchPidNamespaceContainersInvalidEnvVar()
throws ContainerExecutionException, PrivilegedOperationException,
IOException{
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_PID_NAMESPACE, "invalid-value");
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
List<String> args = op.getArguments();
String dockerCommandFile = args.get(11);
List<String> dockerCommands = Files.readAllLines(Paths.get
(dockerCommandFile), Charset.forName("UTF-8"));
int expected = 13;
Assert.assertEquals(expected, dockerCommands.size());
String command = dockerCommands.get(0);
//ensure --pid isn't in the invocation
Assert.assertTrue("Unexpected --pid in docker run args : " + command,
!command.contains("--pid"));
}
@Test
public void testLaunchPidNamespaceContainersWithDisabledSetting()
throws ContainerExecutionException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_PID_NAMESPACE, "host");
try {
runtime.launchContainer(builder.build());
Assert.fail("Expected a pid host disabled container failure.");
} catch (ContainerExecutionException e) {
LOG.info("Caught expected exception : " + e);
}
}
@Test
public void testLaunchPidNamespaceContainersEnabled()
throws ContainerExecutionException, PrivilegedOperationException,
IOException{
//Enable host pid namespace containers.
conf.setBoolean(YarnConfiguration.NM_DOCKER_ALLOW_HOST_PID_NAMESPACE,
true);
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_PID_NAMESPACE, "host");
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
List<String> args = op.getArguments();
String dockerCommandFile = args.get(11);
List<String> dockerCommands = Files.readAllLines(
Paths.get(dockerCommandFile), Charset.forName("UTF-8"));
int expected = 14;
int counter = 0;
Assert.assertEquals(expected, dockerCommands.size());
Assert.assertEquals("[docker-command-execution]",
dockerCommands.get(counter++));
Assert.assertEquals(" cap-add=SYS_CHROOT,NET_BIND_SERVICE",
dockerCommands.get(counter++));
Assert.assertEquals(" cap-drop=ALL", dockerCommands.get(counter++));
Assert.assertEquals(" detach=true", dockerCommands.get(counter++));
Assert.assertEquals(" docker-command=run", dockerCommands.get(counter++));
Assert.assertEquals(" hostname=ctr-id", dockerCommands.get(counter++));
Assert
.assertEquals(" image=busybox:latest", dockerCommands.get(counter++));
Assert.assertEquals(
" launch-command=bash,/test_container_work_dir/launch_container.sh",
dockerCommands.get(counter++));
Assert.assertEquals(" name=container_id", dockerCommands.get(counter++));
Assert.assertEquals(" net=host", dockerCommands.get(counter++));
Assert.assertEquals(" pid=host", dockerCommands.get(counter++));
Assert.assertEquals(
" rw-mounts=/test_container_local_dir:/test_container_local_dir,"
+ "/test_filecache_dir:/test_filecache_dir,"
+ "/test_container_work_dir:/test_container_work_dir,"
+ "/test_container_log_dir:/test_container_log_dir,"
+ "/test_user_local_dir:/test_user_local_dir",
dockerCommands.get(counter++));
Assert.assertEquals(" user=run_as_user", dockerCommands.get(counter++));
Assert.assertEquals(" workdir=/test_container_work_dir",
dockerCommands.get(counter++));
}
@Test
public void testLaunchPrivilegedContainersInvalidEnvVar()
throws ContainerExecutionException, PrivilegedOperationException,
@ -709,8 +810,8 @@ public class TestDockerContainerRuntime {
List<String> args = op.getArguments();
String dockerCommandFile = args.get(11);
List<String> dockerCommands = Files.readAllLines(Paths.get
(dockerCommandFile), Charset.forName("UTF-8"));
List<String> dockerCommands = Files.readAllLines(
Paths.get(dockerCommandFile), Charset.forName("UTF-8"));
int expected = 13;
Assert.assertEquals(expected, dockerCommands.size());

View File

@ -140,6 +140,14 @@ The following properties should be set in yarn-site.xml:
</description>
</property>
<property>
<name>yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed</name>
<value>false</value>
<description>
Optional. Whether containers are allowed to use the host PID namespace.
</description>
</property>
<property>
<name>yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed</name>
<value>false</value>
@ -196,6 +204,7 @@ are allowed. It contains the following properties:
| `docker.allowed.networks` | Comma separated networks that containers are allowed to use. If no network is specified when launching the container, the default Docker network will be used. |
| `docker.allowed.ro-mounts` | Comma separated directories that containers are allowed to mount in read-only mode. By default, no directories are allowed to mounted. |
| `docker.allowed.rw-mounts` | Comma separated directories that containers are allowed to mount in read-write mode. By default, no directories are allowed to mounted. |
| `docker.host-pid-namespace.enabled` | Set to "true" or "false" to enable or disable using the host's PID namespace. Default value is "false". |
| `docker.privileged-containers.enabled` | Set to "true" or "false" to enable or disable launching privileged containers. Default value is "false". |
Please note that if you wish to run Docker containers that require access to the YARN local directories, you must add them to the docker.allowed.rw-mounts list.
@ -288,6 +297,7 @@ environment variables in the application's environment:
| `YARN_CONTAINER_RUNTIME_DOCKER_IMAGE` | Names which image will be used to launch the Docker container. Any image name that could be passed to the Docker client's run command may be used. The image name may include a repo prefix. |
| `YARN_CONTAINER_RUNTIME_DOCKER_RUN_OVERRIDE_DISABLE` | Controls whether the Docker container's default command is overridden. When set to true, the Docker container's command will be "bash _path\_to\_launch\_script_". When unset or set to false, the Docker container's default command is used. |
| `YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK` | Sets the network type to be used by the Docker container. It must be a valid value as determined by the yarn.nodemanager.runtime.linux.docker.allowed-container-networks property. |
| `YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE` | Controls which PID namespace will be used by the Docker container. By default, each Docker container has its own PID namespace. To share the namespace of the host, the yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed property must be set to true. If the host PID namespace is allowed and this environment variable is set to host, the Docker container will share the host's PID namespace. No other value is allowed. |
| `YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER` | Controls whether the Docker container is a privileged container. In order to use privileged containers, the yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed property must be set to true, and the application owner must appear in the value of the yarn.nodemanager.runtime.linux.docker.privileged-containers.acl property. If this environment variable is set to true, a privileged Docker container will be used if allowed. No other value is allowed, so the environment variable should be left unset rather than setting it to false. |
| `YARN_CONTAINER_RUNTIME_DOCKER_LOCAL_RESOURCE_MOUNTS` | Adds additional volume mounts to the Docker container. The value of the environment variable should be a comma-separated list of mounts. All such mounts must be given as "source:dest", where the source is an absolute path that is not a symlink and that points to a localized resource. Note that as of YARN-5298, localized directories are automatically mounted into the container as volumes. |
| `YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS` | Adds additional volume mounts to the Docker container. The value of the environment variable should be a comma-separated list of mounts. All such mounts must be given as "source:dest:mode" and the mode must be "ro" (read-only) or "rw" (read-write) to specify the type of access being requested. The requested mounts will be validated by container-executor based on the values set in container-executor.cfg for docker.allowed.ro-mounts and docker.allowed.rw-mounts. |