YARN-8822. Nvidia-docker v2 support for YARN GPU feature. (Charo Zhang via wangda)

Change-Id: Id8af27134d3286a7a10d85eda9be25df9689d0e7
This commit is contained in:
Wangda Tan 2019-01-07 12:07:26 -08:00
parent 06279ecc55
commit 0a01d49917
12 changed files with 351 additions and 3 deletions

View File

@ -16,6 +16,7 @@ feature.tc.enabled=false
# docker.privileged-containers.enabled=false # docker.privileged-containers.enabled=false
# docker.allowed.volume-drivers=## comma seperated list of allowed volume-drivers # docker.allowed.volume-drivers=## comma seperated list of allowed volume-drivers
# docker.no-new-privileges.enabled=## enable/disable the no-new-privileges flag for docker run. Set to "true" to enable, disabled by default # docker.no-new-privileges.enabled=## enable/disable the no-new-privileges flag for docker run. Set to "true" to enable, disabled by default
# docker.allowed.runtimes=## comma seperated runtimes that can be used.
# The configs below deal with settings for FPGA resource # The configs below deal with settings for FPGA resource
#[fpga] #[fpga]

View File

@ -1664,6 +1664,9 @@ public class YarnConfiguration extends Configuration {
@Private @Private
public static final String NVIDIA_DOCKER_V1 = "nvidia-docker-v1"; public static final String NVIDIA_DOCKER_V1 = "nvidia-docker-v1";
@Private
public static final String NVIDIA_DOCKER_V2 = "nvidia-docker-v2";
@Private @Private
public static final String DEFAULT_NM_GPU_DOCKER_PLUGIN_IMPL = public static final String DEFAULT_NM_GPU_DOCKER_PLUGIN_IMPL =
NVIDIA_DOCKER_V1; NVIDIA_DOCKER_V1;

View File

@ -165,6 +165,11 @@ public class DockerRunCommand extends DockerCommand {
return this; return this;
} }
public DockerRunCommand addRuntime(String runtime) {
super.addCommandArguments("runtime", runtime);
return this;
}
public DockerRunCommand groupAdd(String[] groups) { public DockerRunCommand groupAdd(String[] groups) {
super.addCommandArguments("group-add", String.join(",", groups)); super.addCommandArguments("group-add", String.join(",", groups));
return this; return this;

View File

@ -34,6 +34,10 @@ public class GpuDockerCommandPluginFactory {
if (impl.equals(YarnConfiguration.NVIDIA_DOCKER_V1)) { if (impl.equals(YarnConfiguration.NVIDIA_DOCKER_V1)) {
return new NvidiaDockerV1CommandPlugin(conf); return new NvidiaDockerV1CommandPlugin(conf);
} }
// nvidia-docker2
if (impl.equals(YarnConfiguration.NVIDIA_DOCKER_V2)) {
return new NvidiaDockerV2CommandPlugin();
}
throw new YarnException( throw new YarnException(
"Unkown implementation name for Gpu docker plugin, impl=" + impl); "Unkown implementation name for Gpu docker plugin, impl=" + impl);

View File

@ -0,0 +1,111 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* Implementation to use nvidia-docker v2 as GPU docker command plugin.
*/
public class NvidiaDockerV2CommandPlugin implements DockerCommandPlugin {
final static Log LOG = LogFactory.getLog(NvidiaDockerV2CommandPlugin.class);
private String nvidiaRuntime = "nvidia";
private String nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES";
public NvidiaDockerV2CommandPlugin() {}
private Set<GpuDevice> getAssignedGpus(Container container) {
ResourceMappings resourceMappings = container.getResourceMappings();
// Copy of assigned Resources
Set<GpuDevice> assignedResources = null;
if (resourceMappings != null) {
assignedResources = new HashSet<>();
for (Serializable s : resourceMappings.getAssignedResources(
ResourceInformation.GPU_URI)) {
assignedResources.add((GpuDevice) s);
}
}
if (assignedResources == null || assignedResources.isEmpty()) {
// When no GPU resource assigned, don't need to update docker command.
return Collections.emptySet();
}
return assignedResources;
}
@VisibleForTesting
protected boolean requestsGpu(Container container) {
return GpuResourceAllocator.getRequestedGpus(container.getResource()) > 0;
}
@Override
public synchronized void updateDockerRunCommand(
DockerRunCommand dockerRunCommand, Container container)
throws ContainerExecutionException {
if (!requestsGpu(container)) {
return;
}
Set<GpuDevice> assignedResources = getAssignedGpus(container);
if (assignedResources == null || assignedResources.isEmpty()) {
return;
}
Map<String, String> environment = new HashMap<>();
String gpuIndexList = "";
for (GpuDevice gpuDevice : assignedResources) {
gpuIndexList = gpuIndexList + gpuDevice.getIndex() + ",";
LOG.info("nvidia docker2 assigned gpu index: " + gpuDevice.getIndex());
}
dockerRunCommand.addRuntime(nvidiaRuntime);
environment.put(nvidiaVisibleDevices,
gpuIndexList.substring(0, gpuIndexList.length() - 1));
dockerRunCommand.addEnv(environment);
}
@Override
public DockerVolumeCommand getCreateDockerVolumeCommand(Container container)
throws ContainerExecutionException {
// No Volume needed for nvidia-docker2.
return null;
}
@Override
public DockerVolumeCommand getCleanupDockerVolumesCommand(Container container)
throws ContainerExecutionException {
// No cleanup needed.
return null;
}
}

View File

@ -351,6 +351,8 @@ const char *get_docker_error_message(const int error_code) {
return "Docker image is not trusted"; return "Docker image is not trusted";
case INVALID_DOCKER_TMPFS_MOUNT: case INVALID_DOCKER_TMPFS_MOUNT:
return "Invalid docker tmpfs mount"; return "Invalid docker tmpfs mount";
case INVALID_DOCKER_RUNTIME:
return "Invalid docker runtime";
default: default:
return "Unknown error"; return "Unknown error";
} }
@ -947,6 +949,19 @@ static int set_network(const struct configuration *command_config,
return ret; return ret;
} }
static int set_runtime(const struct configuration *command_config,
const struct configuration *conf, args *args) {
int ret = 0;
ret = add_param_to_command_if_allowed(command_config, conf, "runtime",
"docker.allowed.runtimes", "--runtime=",
0, 0, args);
if (ret != 0) {
fprintf(ERRORFILE, "Could not find requested runtime in allowed runtimes\n");
ret = INVALID_DOCKER_RUNTIME;
}
return ret;
}
static int add_ports_mapping_to_command(const struct configuration *command_config, args *args) { static int add_ports_mapping_to_command(const struct configuration *command_config, args *args) {
int i = 0, ret = 0; int i = 0, ret = 0;
char *network_type = (char*) malloc(128); char *network_type = (char*) malloc(128);
@ -1654,6 +1669,11 @@ int get_docker_run_command(const char *command_file, const struct configuration
goto free_and_exit; goto free_and_exit;
} }
ret = set_runtime(&command_config, conf, args);
if (ret != 0) {
goto free_and_exit;
}
ret = set_hostname(&command_config, args); ret = set_hostname(&command_config, args);
if (ret != 0) { if (ret != 0) {
goto free_and_exit; goto free_and_exit;

View File

@ -69,7 +69,8 @@ enum docker_error_codes {
PID_HOST_DISABLED, PID_HOST_DISABLED,
INVALID_PID_NAMESPACE, INVALID_PID_NAMESPACE,
INVALID_DOCKER_IMAGE_TRUST, INVALID_DOCKER_IMAGE_TRUST,
INVALID_DOCKER_TMPFS_MOUNT INVALID_DOCKER_TMPFS_MOUNT,
INVALID_DOCKER_RUNTIME
}; };
/** /**

View File

@ -444,6 +444,68 @@ namespace ContainerExecutor {
run_docker_run_helper_function(file_cmd_vec, set_hostname); run_docker_run_helper_function(file_cmd_vec, set_hostname);
} }
TEST_F(TestDockerUtil, test_set_runtime) {
struct configuration container_cfg;
struct args buff = ARGS_INITIAL_VALUE;
int ret = 0;
std::string container_executor_cfg_contents = "[docker]\n"
" docker.trusted.registries=hadoop\n"
" docker.allowed.runtimes=lxc,nvidia";
std::vector<std::pair<std::string, std::string> > file_cmd_vec;
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run\n image=hadoop/image\n runtime=lxc", "--runtime=lxc"));
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run\n image=hadoop/image\n runtime=nvidia", "--runtime=nvidia"));
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run", ""));
write_container_executor_cfg(container_executor_cfg_contents);
ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
std::vector<std::pair<std::string, std::string> >::const_iterator itr;
if (ret != 0) {
FAIL();
}
for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) {
struct configuration cmd_cfg;
write_command_file(itr->first);
ret = read_config(docker_command_file.c_str(), &cmd_cfg);
if (ret != 0) {
FAIL();
}
ret = set_runtime(&cmd_cfg, &container_cfg, &buff);
char *actual = flatten(&buff);
ASSERT_EQ(0, ret) << "error message: " << get_docker_error_message(ret) << " for input " << itr->first;
ASSERT_STREQ(itr->second.c_str(), actual);
reset_args(&buff);
free(actual);
free_configuration(&cmd_cfg);
}
struct configuration cmd_cfg_1;
write_command_file("[docker-command-execution]\n docker-command=run\n runtime=nvidia1");
ret = read_config(docker_command_file.c_str(), &cmd_cfg_1);
if (ret != 0) {
FAIL();
}
ret = set_runtime(&cmd_cfg_1, &container_cfg, &buff);
ASSERT_EQ(INVALID_DOCKER_RUNTIME, ret);
ASSERT_EQ(0, buff.length);
reset_args(&buff);
free_configuration(&container_cfg);
container_executor_cfg_contents = "[docker]\n";
write_container_executor_cfg(container_executor_cfg_contents);
ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
if (ret != 0) {
FAIL();
}
ret = set_runtime(&cmd_cfg_1, &container_cfg, &buff);
ASSERT_EQ(INVALID_DOCKER_RUNTIME, ret);
ASSERT_EQ(0, buff.length);
reset_args(&buff);
free_configuration(&cmd_cfg_1);
free_configuration(&container_cfg);
}
TEST_F(TestDockerUtil, test_set_group_add) { TEST_F(TestDockerUtil, test_set_group_add) {
std::vector<std::pair<std::string, std::string> > file_cmd_vec; std::vector<std::pair<std::string, std::string> > file_cmd_vec;
file_cmd_vec.push_back(std::make_pair<std::string, std::string>( file_cmd_vec.push_back(std::make_pair<std::string, std::string>(

View File

@ -63,6 +63,7 @@ public class TestDockerRunCommand {
for (String mapping:portsMapping.split(",")) { for (String mapping:portsMapping.split(",")) {
dockerRunCommand.addPortsMapping(mapping); dockerRunCommand.addPortsMapping(mapping);
} }
dockerRunCommand.addRuntime("nvidia");
assertEquals("run", StringUtils.join(",", assertEquals("run", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments() dockerRunCommand.getDockerCommandWithArguments()
@ -86,7 +87,9 @@ public class TestDockerRunCommand {
assertEquals("127.0.0.1:8080:80,1234:1234,:2222", StringUtils.join(",", assertEquals("127.0.0.1:8080:80,1234:1234,:2222", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments() dockerRunCommand.getDockerCommandWithArguments()
.get("ports-mapping"))); .get("ports-mapping")));
assertEquals(9, dockerRunCommand.getDockerCommandWithArguments().size()); assertEquals("nvidia", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments().get("runtime")));
assertEquals(10, dockerRunCommand.getDockerCommandWithArguments().size());
} }
@Test @Test

View File

@ -0,0 +1,130 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
/**
* test for NvidiaDockerV2CommandPlugin.
*/
public class TestNvidiaDockerV2CommandPlugin {
private Map<String, List<String>> copyCommandLine(
Map<String, List<String>> map) {
Map<String, List<String>> ret = new HashMap<>();
for (Map.Entry<String, List<String>> entry : map.entrySet()) {
ret.put(entry.getKey(), new ArrayList<>(entry.getValue()));
}
return ret;
}
private boolean commandlinesEquals(Map<String, List<String>> cli1,
Map<String, List<String>> cli2) {
if (!Sets.symmetricDifference(cli1.keySet(), cli2.keySet()).isEmpty()) {
return false;
}
for (String key : cli1.keySet()) {
List<String> value1 = cli1.get(key);
List<String> value2 = cli2.get(key);
if (!value1.equals(value2)) {
return false;
}
}
return true;
}
static class MyNvidiaDockerV2CommandPlugin
extends NvidiaDockerV2CommandPlugin {
private boolean requestsGpu = false;
MyNvidiaDockerV2CommandPlugin() {}
public void setRequestsGpu(boolean r) {
requestsGpu = r;
}
@Override
protected boolean requestsGpu(Container container) {
return requestsGpu;
}
}
@Test
public void testPlugin() throws Exception {
DockerRunCommand runCommand = new DockerRunCommand("container_1", "user",
"fakeimage");
Map<String, List<String>> originalCommandline = copyCommandLine(
runCommand.getDockerCommandWithArguments());
MyNvidiaDockerV2CommandPlugin
commandPlugin = new MyNvidiaDockerV2CommandPlugin();
Container nmContainer = mock(Container.class);
// getResourceMapping is null, so commandline won't be updated
commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
Assert.assertTrue(commandlinesEquals(originalCommandline,
runCommand.getDockerCommandWithArguments()));
// no GPU resource assigned, so commandline won't be updated
ResourceMappings resourceMappings = new ResourceMappings();
when(nmContainer.getResourceMappings()).thenReturn(resourceMappings);
commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
Assert.assertTrue(commandlinesEquals(originalCommandline,
runCommand.getDockerCommandWithArguments()));
// Assign GPU resource
ResourceMappings.AssignedResources assigned =
new ResourceMappings.AssignedResources();
assigned.updateAssignedResources(
ImmutableList.of(new GpuDevice(0, 0), new GpuDevice(1, 1)));
resourceMappings.addAssignedResources(ResourceInformation.GPU_URI,
assigned);
commandPlugin.setRequestsGpu(true);
commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
Map<String, List<String>> newCommandLine =
runCommand.getDockerCommandWithArguments();
// Command line will be updated
Assert.assertFalse(commandlinesEquals(originalCommandline, newCommandLine));
// NVIDIA_VISIBLE_DEVICES will be set
Assert.assertTrue(
runCommand.getEnv().get("NVIDIA_VISIBLE_DEVICES").equals("0,1"));
// runtime should exist
Assert.assertTrue(newCommandLine.containsKey("runtime"));
}
}

View File

@ -274,6 +274,7 @@ are allowed. It contains the following properties:
| `docker.trusted.registries` | Comma separated list of trusted docker registries for running trusted privileged docker containers. By default, no registries are defined. | | `docker.trusted.registries` | Comma separated list of trusted docker registries for running trusted privileged docker containers. By default, no registries are defined. |
| `docker.inspect.max.retries` | Integer value to check docker container readiness. Each inspection is set with 3 seconds delay. Default value of 10 will wait 30 seconds for docker container to become ready before marked as container failed. | | `docker.inspect.max.retries` | Integer value to check docker container readiness. Each inspection is set with 3 seconds delay. Default value of 10 will wait 30 seconds for docker container to become ready before marked as container failed. |
| `docker.no-new-privileges.enabled` | Enable/disable the no-new-privileges flag for docker run. Set to "true" to enable, disabled by default. | | `docker.no-new-privileges.enabled` | Enable/disable the no-new-privileges flag for docker run. Set to "true" to enable, disabled by default. |
| `docker.allowed.runtimes` | Comma seperated runtimes that containers are allowed to use. By default no runtimes are allowed to be added.|
Please note that if you wish to run Docker containers that require access to the YARN local directories, you must add them to the docker.allowed.rw-mounts list. Please note that if you wish to run Docker containers that require access to the YARN local directories, you must add them to the docker.allowed.rw-mounts list.

View File

@ -107,7 +107,7 @@ Following configs can be customized when user needs to run GPU applications insi
| --- | --- | | --- | --- |
| yarn.nodemanager.resource-plugins.gpu.docker-plugin | nvidia-docker-v1 | | yarn.nodemanager.resource-plugins.gpu.docker-plugin | nvidia-docker-v1 |
Specify docker command plugin for GPU. By default uses Nvidia docker V1.0. Specify docker command plugin for GPU. By default uses Nvidia docker V1.0, `nvidia-docker-v2` is available for V2.x.
| Property | Default value | | Property | Default value |
| --- | --- | | --- | --- |
@ -169,6 +169,13 @@ docker.allowed.volume-drivers
... ...
docker.allowed.ro-mounts=nvidia_driver_375.66 docker.allowed.ro-mounts=nvidia_driver_375.66
``` ```
**4) If use `nvidia-docker-v2` as gpu docker plugin, add `nvidia` to runtimes whitelist.**
```
[docker]
...
docker.allowed.runtimes=nvidia
```
# Use it # Use it