YARN-8648. Container cgroups are leaked when using docker. Contributed by Jim Brennan

(cherry picked from commit 2df0a8dcb3)
This commit is contained in:
Jason Lowe 2018-09-18 15:28:04 -05:00
parent af2390cf49
commit 3d77094cf2
10 changed files with 393 additions and 17 deletions

View File

@ -937,7 +937,8 @@ public class LinuxContainerExecutor extends ContainerExecutor {
DockerCommandExecutor.getContainerStatus(containerId, privOpExecutor,
nmContext))) {
LOG.info("Removing Docker container : " + containerId);
DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId);
DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId,
ResourceHandlerModule.getCgroupsRelativeRoot());
DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId,
null, privOpExecutor, false, nmContext);
}

View File

@ -101,6 +101,21 @@ public class ResourceHandlerModule {
return cGroupsHandler;
}
/**
* Returns relative root for cgroups. Returns null if cGroupsHandler is
* not initialized, or if the path is empty.
*/
public static String getCgroupsRelativeRoot() {
if (cGroupsHandler == null) {
return null;
}
String cGroupPath = cGroupsHandler.getRelativePathForCGroup("");
if (cGroupPath == null || cGroupPath.isEmpty()) {
return null;
}
return cGroupPath.replaceAll("/$", "");
}
public static NetworkPacketTaggingHandlerImpl
getNetworkResourceHandler() {
return networkPacketTaggingHandlerImpl;

View File

@ -1347,7 +1347,8 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
DockerCommandExecutor.getContainerStatus(containerId,
privilegedOperationExecutor, nmContext);
if (DockerCommandExecutor.isRemovable(containerStatus)) {
DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId);
DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId,
ResourceHandlerModule.getCgroupsRelativeRoot());
DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId,
env, privilegedOperationExecutor, false, nmContext);
}

View File

@ -27,10 +27,16 @@ import java.util.Map;
*/
public class DockerRmCommand extends DockerCommand {
private static final String RM_COMMAND = "rm";
private static final String CGROUP_HIERARCHY = "hierarchy";
private String cGroupArg;
public DockerRmCommand(String containerName) {
public DockerRmCommand(String containerName, String hierarchy) {
super(RM_COMMAND);
super.addCommandArguments("name", containerName);
if ((hierarchy != null) && !hierarchy.isEmpty()) {
super.addCommandArguments(CGROUP_HIERARCHY, hierarchy);
this.cGroupArg = hierarchy;
}
}
@Override
@ -39,6 +45,9 @@ public class DockerRmCommand extends DockerCommand {
String> env, Context nmContext) {
PrivilegedOperation dockerOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.REMOVE_DOCKER_CONTAINER);
if (this.cGroupArg != null) {
dockerOp.appendArgs(cGroupArg);
}
dockerOp.appendArgs(containerName);
return dockerOp;
}

View File

@ -46,6 +46,7 @@
#include <sys/mount.h>
#include <sys/wait.h>
#include <getopt.h>
#include <sys/param.h>
#ifndef HAVE_FCHMODAT
#include "compat/fchmodat.h"
@ -1374,17 +1375,16 @@ int run_docker(const char *command_file) {
return exit_code;
}
int exec_docker_command(char *docker_command, char **argv,
int argc, int optind) {
int exec_docker_command(char *docker_command, char **argv, int argc) {
int i;
char* docker_binary = get_docker_binary(&CFG);
size_t command_size = argc - optind + 2;
size_t command_size = argc + 2;
char **args = alloc_and_clear_memory(command_size + 1, sizeof(char));
char **args = alloc_and_clear_memory(command_size + 1, sizeof(char *));
args[0] = docker_binary;
args[1] = docker_command;
for(i = 2; i < command_size; i++) {
args[i] = (char *) argv[i];
args[i] = (char *) argv[i - 2];
}
args[i] = NULL;
@ -2565,4 +2565,147 @@ char* flatten(char **args) {
return buffer;
}
int clean_docker_cgroups_internal(const char *mount_table,
const char *yarn_hierarchy,
const char* container_id) {
#ifndef __linux
fprintf(LOGFILE, "Failed to clean cgroups, not supported\n");
return -1;
#else
const char * cgroup_mount_type = "cgroup";
char *mnt_type = NULL;
char *mnt_dir = NULL;
char *full_path = NULL;
char *lineptr = NULL;
FILE *fp = NULL;
int rc = 0;
size_t buf_size = 0;
if (!mount_table || mount_table[0] == 0) {
fprintf(ERRORFILE, "clean_docker_cgroups: Invalid mount table\n");
rc = -1;
goto cleanup;
}
if (!yarn_hierarchy || yarn_hierarchy[0] == 0) {
fprintf(ERRORFILE, "clean_docker_cgroups: Invalid yarn_hierarchy\n");
rc = -1;
goto cleanup;
}
if (!validate_container_id(container_id)) {
fprintf(ERRORFILE, "clean_docker_cgroups: Invalid container_id: %s\n",
(container_id == NULL) ? "null" : container_id);
rc = -1;
goto cleanup;
}
fp = fopen(mount_table, "r");
if (fp == NULL) {
fprintf(ERRORFILE, "clean_docker_cgroups: failed to open %s, error %d: %s\n",
mount_table, errno, strerror(errno));
rc = -1;
goto cleanup;
}
// Walk /proc/mounts and find cgroup mounts
while (getline(&lineptr, &buf_size, fp) != -1) {
// Free these from the last iteration, if set
free(mnt_type);
free(mnt_dir);
int ret = 0;
ret = sscanf(lineptr, " %ms %ms %*s %*s %*s %*s", &mnt_type, &mnt_dir);
if (ret != 2) {
fprintf(ERRORFILE, "clean_docker_cgroups: Failed to parse line: %s\n", lineptr);
rc = -1;
break;
}
if ((mnt_type == NULL) || (strcmp(mnt_type, cgroup_mount_type) != 0)) {
continue;
}
if ((mnt_dir == NULL) || (mnt_dir[0] == 0)) {
fprintf(ERRORFILE, "clean_docker_cgroups: skipping mount entry with invalid mnt_dir\n");
continue;
}
free(full_path); // from previous iteration
full_path = make_string("%s/%s/%s", mnt_dir, yarn_hierarchy, container_id);
if (full_path == NULL) {
fprintf(ERRORFILE, "clean_docker_cgroups: Failed to allocate cgroup path.\n");
rc = -1;
break;
}
// Make sure path is clean
if (!verify_path_safety(full_path)) {
fprintf(ERRORFILE,
"clean_docker_cgroups: skipping invalid path: %s\n", full_path);
continue;
}
ret = rmdir(full_path);
if ((ret == -1) && (errno != ENOENT)) {
fprintf(ERRORFILE, "clean_docker_cgroups: Failed to rmdir cgroup, %s (error=%s)\n",
full_path, strerror(errno));
rc = -1;
continue;
}
}
if (ferror(fp)) {
fprintf(ERRORFILE, "clean_docker_cgroups: Error reading %s, error=%d (%s) \n",
mount_table, errno, strerror(errno));
rc = -1;
}
cleanup:
free(lineptr);
free(mnt_type);
free(mnt_dir);
free(full_path);
if (fp != NULL) {
fclose(fp);
}
return rc;
#endif
}
int clean_docker_cgroups(const char *yarn_hierarchy, const char* container_id) {
const char *proc_mount_path = "/proc/mounts";
return clean_docker_cgroups_internal(proc_mount_path, yarn_hierarchy, container_id);
}
int remove_docker_container(char**argv, int argc) {
int exit_code = 0;
const char *yarn_hierarchy = NULL;
const char *container_id = NULL;
int start_index = 0;
if (argc == 2) {
yarn_hierarchy = argv[0];
container_id = argv[1];
// Skip the yarn_hierarchy argument for exec_docker_command
start_index = 1;
}
pid_t child_pid = fork();
if (child_pid == -1) {
fprintf (ERRORFILE,
"Failed to fork for docker remove command\n");
fflush(ERRORFILE);
return DOCKER_RUN_FAILED;
}
if (child_pid == 0) { // child
int rc = exec_docker_command("rm", argv + start_index, argc - start_index);
return rc; // Only get here if exec fails
} else { // parent
exit_code = wait_and_get_exit_code(child_pid);
if (exit_code != 0) {
exit_code = DOCKER_RUN_FAILED;
}
}
// Clean up cgroups if necessary
if (yarn_hierarchy != NULL) {
exit_code = clean_docker_cgroups(yarn_hierarchy, container_id);
}
return exit_code;
}

View File

@ -271,8 +271,7 @@ int run_docker(const char *command_file);
/**
* Run a docker command without a command file
*/
int exec_docker_command(char *docker_command, char **argv,
int argc, int optind);
int exec_docker_command(char *docker_command, char **argv, int argc);
/*
* Compile the regex_str and determine if the input string matches.
@ -292,3 +291,8 @@ struct configuration* get_cfg();
* Flatten docker launch command
*/
char* flatten(char **args);
/**
* Remove docker container
*/
int remove_docker_container(char **argv, int argc);

View File

@ -54,12 +54,14 @@ static void display_usage(FILE *stream) {
if(is_docker_support_enabled()) {
fprintf(stream,
" container-executor --run-docker <command-file>\n"
" container-executor --remove-docker-container <container_id>\n"
" container-executor --remove-docker-container [hierarchy] "
"<container_id>\n"
" container-executor --inspect-docker-container <container_id>\n");
} else {
fprintf(stream,
"[DISABLED] container-executor --run-docker <command-file>\n"
"[DISABLED] container-executor --remove-docker-container <container_id>\n"
"[DISABLED] container-executor --remove-docker-container [hierarchy] "
"<container_id>\n"
"[DISABLED] container-executor --inspect-docker-container "
"<format> ... <container_id>\n");
}
@ -351,7 +353,7 @@ static int validate_arguments(int argc, char **argv , int *operation) {
if (strcmp("--remove-docker-container", argv[1]) == 0) {
if(is_docker_support_enabled()) {
if (argc != 3) {
if ((argc != 3) && (argc != 4)) {
display_usage(stdout);
return INVALID_ARGUMENT_NUMBER;
}
@ -594,10 +596,10 @@ int main(int argc, char **argv) {
exit_code = run_docker(cmd_input.docker_command_file);
break;
case REMOVE_DOCKER_CONTAINER:
exit_code = exec_docker_command("rm", argv, argc, optind);
exit_code = remove_docker_container(argv + optind, argc - optind);
break;
case INSPECT_DOCKER_CONTAINER:
exit_code = exec_docker_command("inspect", argv, argc, optind);
exit_code = exec_docker_command("inspect", argv + optind, argc - optind);
break;
case RUN_AS_USER_INITIALIZE_CONTAINER:
exit_code = set_user(cmd_input.run_as_user_name);

View File

@ -23,6 +23,7 @@
#include "test/test-container-executor-common.h"
#include <inttypes.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
@ -32,6 +33,8 @@
#include <string.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/param.h>
static char* username = NULL;
static char* yarn_username = NULL;
@ -1224,6 +1227,148 @@ void test_is_empty() {
}
}
#define TCE_FAKE_CGROOT TEST_ROOT "/cgroup_root"
#define TCE_NUM_CG_CONTROLLERS 6
extern int clean_docker_cgroups_internal(const char *mount_table,
const char *yarn_hierarchy,
const char* container_id);
void test_cleaning_docker_cgroups() {
const char *controllers[TCE_NUM_CG_CONTROLLERS] = { "blkio", "cpu", "cpuset", "devices", "memory", "systemd" };
const char *yarn_hierarchy = "hadoop-yarn";
const char *fake_mount_table = TEST_ROOT "/fake_mounts";
const char *container_id = "container_1410901177871_0001_01_000005";
const char *other_container_id = "container_e17_1410901177871_0001_01_000005";
char cgroup_paths[TCE_NUM_CG_CONTROLLERS][PATH_MAX];
char container_paths[TCE_NUM_CG_CONTROLLERS][PATH_MAX];
char other_container_paths[TCE_NUM_CG_CONTROLLERS][PATH_MAX];
printf("\nTesting clean_docker_cgroups\n");
// Setup fake mount table
FILE *file;
file = fopen(fake_mount_table, "w");
if (file == NULL) {
printf("Failed to open %s.\n", fake_mount_table);
exit(1);
}
fprintf(file, "rootfs " TEST_ROOT "/fake_root rootfs rw 0 0\n");
fprintf(file, "sysfs " TEST_ROOT "/fake_sys sysfs rw,nosuid,nodev,noexec,relatime 0 0\n");
fprintf(file, "proc " TEST_ROOT "/fake_proc proc rw,nosuid,nodev,noexec,relatime 0 0\n");
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
fprintf(file, "cgroup %s/%s cgroup rw,nosuid,nodev,noexec,relatime,%s 0 0\n",
TCE_FAKE_CGROOT, controllers[i], controllers[i]);
}
fprintf(file, "/dev/vda " TEST_ROOT "/fake_root ext4 rw,relatime,data=ordered 0 0\n");
fclose(file);
// Test with null inputs
int ret = clean_docker_cgroups_internal(NULL, yarn_hierarchy, container_id);
if (ret != -1) {
printf("FAIL: clean_docker_cgroups_internal with NULL mount table should fail\n");
exit(1);
}
ret = clean_docker_cgroups_internal(fake_mount_table, NULL, container_id);
if (ret != -1) {
printf("FAIL: clean_docker_cgroups_internal with NULL yarn_hierarchy should fail\n");
exit(1);
}
ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, NULL);
if (ret != -1) {
printf("FAIL: clean_docker_cgroups_internal with NULL container_id should fail\n");
exit(1);
}
// Test with invalid container_id
ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, "not_a_container_123");
if (ret != -1) {
printf("FAIL: clean_docker_cgroups_internal with invalid container_id should fail\n");
exit(1);
}
if (mkdir(TCE_FAKE_CGROOT, 0755) != 0) {
printf("FAIL: failed to mkdir " TCE_FAKE_CGROOT "\n");
exit(1);
}
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
snprintf(cgroup_paths[i], PATH_MAX, TCE_FAKE_CGROOT "/%s/%s", controllers[i], yarn_hierarchy);
if (mkdirs(cgroup_paths[i], 0755) != 0) {
printf("FAIL: failed to mkdir %s\n", cgroup_paths[i]);
exit(1);
}
}
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
DIR *dir = NULL;
dir = opendir(cgroup_paths[i]);
if (dir == NULL) {
printf("FAIL: failed to open dir %s\n", cgroup_paths[i]);
exit(1);
}
closedir(dir);
}
// Test before creating any containers
ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, container_id);
if (ret != 0) {
printf("FAIL: failed to clean cgroups: mt=%s, yh=%s, cId=%s\n",
fake_mount_table, yarn_hierarchy, container_id);
}
// make sure hadoop-yarn dirs are still there
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
DIR *dir = NULL;
dir = opendir(cgroup_paths[i]);
if (dir == NULL) {
printf("FAIL: failed to open dir %s\n", cgroup_paths[i]);
exit(1);
}
closedir(dir);
}
// Create container dirs
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
snprintf(container_paths[i], PATH_MAX, TCE_FAKE_CGROOT "/%s/%s/%s",
controllers[i], yarn_hierarchy, container_id);
if (mkdirs(container_paths[i], 0755) != 0) {
printf("FAIL: failed to mkdir %s\n", container_paths[i]);
exit(1);
}
snprintf(other_container_paths[i], PATH_MAX, TCE_FAKE_CGROOT "/%s/%s/%s",
controllers[i], yarn_hierarchy, other_container_id);
if (mkdirs(other_container_paths[i], 0755) != 0) {
printf("FAIL: failed to mkdir %s\n", other_container_paths[i]);
exit(1);
}
}
ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, container_id);
// make sure hadoop-yarn dirs are still there
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
DIR *dir = NULL;
dir = opendir(cgroup_paths[i]);
if (dir == NULL) {
printf("FAIL: failed to open dir %s\n", cgroup_paths[i]);
exit(1);
}
closedir(dir);
}
// make sure container dirs deleted
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
DIR *dir = NULL;
dir = opendir(container_paths[i]);
if (dir != NULL) {
printf("FAIL: container cgroup %s not deleted\n", container_paths[i]);
exit(1);
}
closedir(dir);
}
// make sure other container dirs are still there
for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) {
DIR *dir = NULL;
dir = opendir(other_container_paths[i]);
if (dir == NULL) {
printf("FAIL: container cgroup %s should not be deleted\n", other_container_paths[i]);
exit(1);
}
closedir(dir);
}
}
// This test is expected to be executed either by a regular
// user or by root. If executed by a regular user it doesn't
// test all the functions that would depend on changing the
@ -1328,6 +1473,8 @@ int main(int argc, char **argv) {
test_check_user(0);
test_cleaning_docker_cgroups();
#ifdef __APPLE__
printf("OS X: disabling CrashReporter\n");
/*

View File

@ -66,6 +66,7 @@ public class TestDockerCommandExecutor {
"container_e11_1861047502093_13763105_01_000001";
private static final String MOCK_LOCAL_IMAGE_NAME = "local_image_name";
private static final String MOCK_IMAGE_NAME = "image_name";
private static final String MOCK_CGROUP_HIERARCHY = "hadoop-yarn";
private PrivilegedOperationExecutor mockExecutor;
private CGroupsHandler mockCGroupsHandler;
@ -148,7 +149,8 @@ public class TestDockerCommandExecutor {
@Test
public void testExecuteDockerRm() throws Exception {
DockerRmCommand dockerCommand = new DockerRmCommand(MOCK_CONTAINER_ID);
DockerRmCommand dockerCommand =
new DockerRmCommand(MOCK_CONTAINER_ID, null);
DockerCommandExecutor.executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID,
env, mockExecutor, false, nmContext);
List<PrivilegedOperation> ops = MockPrivilegedOperationCaptor
@ -163,6 +165,25 @@ public class TestDockerCommandExecutor {
assertEquals(MOCK_CONTAINER_ID, args.get(0));
}
@Test
public void testExecuteDockerRmWithCgroup() throws Exception {
DockerRmCommand dockerCommand =
new DockerRmCommand(MOCK_CONTAINER_ID, MOCK_CGROUP_HIERARCHY);
DockerCommandExecutor.executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID,
env, mockExecutor, false, nmContext);
List<PrivilegedOperation> ops = MockPrivilegedOperationCaptor
.capturePrivilegedOperations(mockExecutor, 1, true);
PrivilegedOperation privOp = ops.get(0);
List<String> args = privOp.getArguments();
assertEquals(1, ops.size());
assertEquals(PrivilegedOperation.OperationType.
REMOVE_DOCKER_CONTAINER.name(),
privOp.getOperationType().name());
assertEquals(2, args.size());
assertEquals(MOCK_CGROUP_HIERARCHY, args.get(0));
assertEquals(MOCK_CONTAINER_ID, args.get(1));
}
@Test
public void testExecuteDockerStop() throws Exception {
DockerStopCommand dockerCommand = new DockerStopCommand(MOCK_CONTAINER_ID);

View File

@ -29,12 +29,19 @@ import org.junit.Test;
public class TestDockerRmCommand {
private DockerRmCommand dockerRmCommand;
private DockerRmCommand dockerRmCommandWithCgroupArg;
private DockerRmCommand dockerRmCommandWithEmptyCgroupArg;
private static final String CONTAINER_NAME = "foo";
private static final String CGROUP_HIERARCHY_NAME = "hadoop-yarn";
@Before
public void setUp() {
dockerRmCommand = new DockerRmCommand(CONTAINER_NAME);
dockerRmCommand = new DockerRmCommand(CONTAINER_NAME, null);
dockerRmCommandWithCgroupArg =
new DockerRmCommand(CONTAINER_NAME, CGROUP_HIERARCHY_NAME);
dockerRmCommandWithEmptyCgroupArg =
new DockerRmCommand(CONTAINER_NAME, "");
}
@Test
@ -51,4 +58,30 @@ public class TestDockerRmCommand {
assertEquals(2, dockerRmCommand.getDockerCommandWithArguments().size());
}
@Test
public void testGetCommandWithCgroup() {
assertEquals("rm", StringUtils.join(",",
dockerRmCommandWithCgroupArg.getDockerCommandWithArguments()
.get("docker-command")));
assertEquals("foo", StringUtils.join(",",
dockerRmCommandWithCgroupArg.getDockerCommandWithArguments()
.get("name")));
assertEquals(CGROUP_HIERARCHY_NAME, StringUtils.join(",",
dockerRmCommandWithCgroupArg.getDockerCommandWithArguments()
.get("hierarchy")));
assertEquals(3,
dockerRmCommandWithCgroupArg.getDockerCommandWithArguments().size());
}
@Test
public void testGetCommandWithEmptyCgroup() {
assertEquals("rm", StringUtils.join(",",
dockerRmCommandWithEmptyCgroupArg
.getDockerCommandWithArguments().get("docker-command")));
assertEquals("foo", StringUtils.join(",",
dockerRmCommandWithEmptyCgroupArg
.getDockerCommandWithArguments().get("name")));
assertEquals(2, dockerRmCommandWithEmptyCgroupArg.
getDockerCommandWithArguments().size());
}
}