YARN-9187. Backport YARN-6852 for GPU-specific native changes to branch-2
This commit is contained in:
parent
078dfb09fe
commit
56259bcecb
|
@ -101,9 +101,11 @@ add_library(container
|
||||||
main/native/container-executor/impl/container-executor.c
|
main/native/container-executor/impl/container-executor.c
|
||||||
main/native/container-executor/impl/get_executable.c
|
main/native/container-executor/impl/get_executable.c
|
||||||
main/native/container-executor/impl/utils/string-utils.c
|
main/native/container-executor/impl/utils/string-utils.c
|
||||||
main/native/container-executor/impl/utils/path-utils.c
|
|
||||||
main/native/container-executor/impl/modules/common/module-configs.c
|
|
||||||
main/native/container-executor/impl/utils/docker-util.c
|
main/native/container-executor/impl/utils/docker-util.c
|
||||||
|
main/native/container-executor/impl/utils/path-utils.c
|
||||||
|
main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
|
||||||
|
main/native/container-executor/impl/modules/common/module-configs.c
|
||||||
|
main/native/container-executor/impl/modules/gpu/gpu-module.c
|
||||||
)
|
)
|
||||||
|
|
||||||
add_executable(container-executor
|
add_executable(container-executor
|
||||||
|
@ -135,6 +137,8 @@ add_executable(cetest
|
||||||
main/native/container-executor/test/utils/test-string-utils.cc
|
main/native/container-executor/test/utils/test-string-utils.cc
|
||||||
main/native/container-executor/test/utils/test-path-utils.cc
|
main/native/container-executor/test/utils/test-path-utils.cc
|
||||||
main/native/container-executor/test/test_util.cc
|
main/native/container-executor/test/test_util.cc
|
||||||
main/native/container-executor/test/utils/test_docker_util.cc)
|
main/native/container-executor/test/utils/test_docker_util.cc
|
||||||
|
main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
|
||||||
|
main/native/container-executor/test/modules/gpu/test-gpu-module.cc)
|
||||||
target_link_libraries(cetest gtest container)
|
target_link_libraries(cetest gtest container)
|
||||||
output_directory(cetest test)
|
output_directory(cetest test)
|
||||||
|
|
|
@ -285,3 +285,5 @@ int execute_regex_match(const char *regex_str, const char *input);
|
||||||
* Return 0 on success.
|
* Return 0 on success.
|
||||||
*/
|
*/
|
||||||
int validate_docker_image_name(const char *image_name);
|
int validate_docker_image_name(const char *image_name);
|
||||||
|
|
||||||
|
struct configuration* get_cfg();
|
||||||
|
|
|
@ -22,6 +22,8 @@
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "get_executable.h"
|
#include "get_executable.h"
|
||||||
#include "utils/string-utils.h"
|
#include "utils/string-utils.h"
|
||||||
|
#include "modules/gpu/gpu-module.h"
|
||||||
|
#include "modules/cgroups/cgroups-operations.h"
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <grp.h>
|
#include <grp.h>
|
||||||
|
@ -241,6 +243,14 @@ static int validate_arguments(int argc, char **argv , int *operation) {
|
||||||
return INVALID_ARGUMENT_NUMBER;
|
return INVALID_ARGUMENT_NUMBER;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if it is a known module, if yes, redirect to module
|
||||||
|
*/
|
||||||
|
if (strcmp("--module-gpu", argv[1]) == 0) {
|
||||||
|
return handle_gpu_request(&update_cgroups_parameters, "gpu", argc - 1,
|
||||||
|
&argv[1]);
|
||||||
|
}
|
||||||
|
|
||||||
if (strcmp("--checksetup", argv[1]) == 0) {
|
if (strcmp("--checksetup", argv[1]) == 0) {
|
||||||
*operation = CHECK_SETUP;
|
*operation = CHECK_SETUP;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -325,6 +335,7 @@ static int validate_arguments(int argc, char **argv , int *operation) {
|
||||||
return FEATURE_DISABLED;
|
return FEATURE_DISABLED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now we have to validate 'run as user' operations that don't use
|
/* Now we have to validate 'run as user' operations that don't use
|
||||||
a 'long option' - we should fix this at some point. The validation/argument
|
a 'long option' - we should fix this at some point. The validation/argument
|
||||||
parsing here is extensive enough that it done in a separate function */
|
parsing here is extensive enough that it done in a separate function */
|
||||||
|
|
|
@ -0,0 +1,161 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "configuration.h"
|
||||||
|
#include "container-executor.h"
|
||||||
|
#include "utils/string-utils.h"
|
||||||
|
#include "utils/path-utils.h"
|
||||||
|
#include "modules/common/module-configs.h"
|
||||||
|
#include "modules/common/constants.h"
|
||||||
|
#include "modules/cgroups/cgroups-operations.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
|
#define MAX_PATH_LEN 4096
|
||||||
|
|
||||||
|
static const struct section* cgroup_cfg_section = NULL;
|
||||||
|
|
||||||
|
void reload_cgroups_configuration() {
|
||||||
|
cgroup_cfg_section = get_configuration_section(CGROUPS_SECTION_NAME, get_cfg());
|
||||||
|
}
|
||||||
|
|
||||||
|
char* get_cgroups_path_to_write(
|
||||||
|
const char* hierarchy_name,
|
||||||
|
const char* param_name,
|
||||||
|
const char* group_id) {
|
||||||
|
int failed = 0;
|
||||||
|
char* buffer = NULL;
|
||||||
|
const char* cgroups_root = get_section_value(CGROUPS_ROOT_KEY,
|
||||||
|
cgroup_cfg_section);
|
||||||
|
const char* yarn_hierarchy_name = get_section_value(
|
||||||
|
CGROUPS_YARN_HIERARCHY_KEY, cgroup_cfg_section);
|
||||||
|
|
||||||
|
// Make sure it is defined.
|
||||||
|
if (!cgroups_root || cgroups_root[0] == 0) {
|
||||||
|
fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
|
||||||
|
CGROUPS_ROOT_KEY);
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure it is defined.
|
||||||
|
if (!yarn_hierarchy_name || yarn_hierarchy_name[0] == 0) {
|
||||||
|
fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
|
||||||
|
CGROUPS_YARN_HIERARCHY_KEY);
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer = malloc(MAX_PATH_LEN + 1);
|
||||||
|
if (!buffer) {
|
||||||
|
fprintf(ERRORFILE, "Failed to allocate memory for output path.\n");
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make a path.
|
||||||
|
// CGroups path should not be too long.
|
||||||
|
if (snprintf(buffer, MAX_PATH_LEN, "%s/%s/%s/%s/%s.%s",
|
||||||
|
cgroups_root, hierarchy_name, yarn_hierarchy_name,
|
||||||
|
group_id, hierarchy_name, param_name) < 0) {
|
||||||
|
fprintf(ERRORFILE, "Failed to print output path.\n");
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (failed) {
|
||||||
|
if (buffer) {
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
int update_cgroups_parameters(
|
||||||
|
const char* hierarchy_name,
|
||||||
|
const char* param_name,
|
||||||
|
const char* group_id,
|
||||||
|
const char* value) {
|
||||||
|
#ifndef __linux
|
||||||
|
fprintf(ERRORFILE, "Failed to update cgroups parameters, not supported\n");
|
||||||
|
return -1;
|
||||||
|
#endif
|
||||||
|
int failure = 0;
|
||||||
|
|
||||||
|
if (!cgroup_cfg_section) {
|
||||||
|
reload_cgroups_configuration();
|
||||||
|
}
|
||||||
|
|
||||||
|
char* full_path = get_cgroups_path_to_write(hierarchy_name, param_name,
|
||||||
|
group_id);
|
||||||
|
|
||||||
|
if (!full_path) {
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"Failed to get cgroups path to write, it should be a configuration issue");
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!verify_path_safety(full_path)) {
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure file exists
|
||||||
|
struct stat sb;
|
||||||
|
if (stat(full_path, &sb) != 0) {
|
||||||
|
fprintf(ERRORFILE, "CGroups: Could not find file to write, %s", full_path);
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(ERRORFILE, "CGroups: Updating cgroups, path=%s, value=%s",
|
||||||
|
full_path, value);
|
||||||
|
|
||||||
|
// Write values to file
|
||||||
|
FILE *f;
|
||||||
|
f = fopen(full_path, "a");
|
||||||
|
if (!f) {
|
||||||
|
fprintf(ERRORFILE, "CGroups: Failed to open cgroups file, %s", full_path);
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
if (fprintf(f, "%s", value) < 0) {
|
||||||
|
fprintf(ERRORFILE, "CGroups: Failed to write cgroups file, %s", full_path);
|
||||||
|
fclose(f);
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
if (fclose(f) != 0) {
|
||||||
|
fprintf(ERRORFILE, "CGroups: Failed to close cgroups file, %s", full_path);
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (full_path) {
|
||||||
|
free(full_path);
|
||||||
|
}
|
||||||
|
return -failure;
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _CGROUPS_OPERATIONS_H_
|
||||||
|
#define _CGROUPS_OPERATIONS_H_
|
||||||
|
|
||||||
|
#define CGROUPS_SECTION_NAME "cgroups"
|
||||||
|
#define CGROUPS_ROOT_KEY "root"
|
||||||
|
#define CGROUPS_YARN_HIERARCHY_KEY "yarn-hierarchy"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle update CGroups parameter update requests:
|
||||||
|
* - hierarchy_name: e.g. devices / cpu,cpuacct
|
||||||
|
* - param_name: e.g. deny
|
||||||
|
* - group_id: e.g. container_x_y
|
||||||
|
* - value: e.g. "a *:* rwm"
|
||||||
|
*
|
||||||
|
* return 0 if succeeded
|
||||||
|
*/
|
||||||
|
int update_cgroups_parameters(
|
||||||
|
const char* hierarchy_name,
|
||||||
|
const char* param_name,
|
||||||
|
const char* group_id,
|
||||||
|
const char* value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get CGroups path to update. Visible for testing.
|
||||||
|
* Return 0 if succeeded
|
||||||
|
*/
|
||||||
|
char* get_cgroups_path_to_write(
|
||||||
|
const char* hierarchy_name,
|
||||||
|
const char* param_name,
|
||||||
|
const char* group_id);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reload config from filesystem, visible for testing.
|
||||||
|
*/
|
||||||
|
void reload_cgroups_configuration();
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,229 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "configuration.h"
|
||||||
|
#include "container-executor.h"
|
||||||
|
#include "utils/string-utils.h"
|
||||||
|
#include "modules/gpu/gpu-module.h"
|
||||||
|
#include "modules/cgroups/cgroups-operations.h"
|
||||||
|
#include "modules/common/module-configs.h"
|
||||||
|
#include "modules/common/constants.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <getopt.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#define EXCLUDED_GPUS_OPTION "excluded_gpus"
|
||||||
|
#define CONTAINER_ID_OPTION "container_id"
|
||||||
|
#define DEFAULT_NVIDIA_MAJOR_NUMBER 195
|
||||||
|
#define MAX_CONTAINER_ID_LEN 128
|
||||||
|
|
||||||
|
static const struct section* cfg_section;
|
||||||
|
|
||||||
|
static int internal_handle_gpu_request(
|
||||||
|
update_cgroups_parameters_func update_cgroups_parameters_func_p,
|
||||||
|
size_t n_minor_devices_to_block, int minor_devices[],
|
||||||
|
const char* container_id) {
|
||||||
|
char* allowed_minor_numbers_str = NULL;
|
||||||
|
int* allowed_minor_numbers = NULL;
|
||||||
|
size_t n_allowed_minor_numbers = 0;
|
||||||
|
int return_code = 0;
|
||||||
|
|
||||||
|
if (n_minor_devices_to_block == 0) {
|
||||||
|
// no device to block, just return;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get major device number from cfg, if not set, major number of (Nvidia)
|
||||||
|
// will be the default value.
|
||||||
|
int major_device_number;
|
||||||
|
char* major_number_str = get_section_value(GPU_MAJOR_NUMBER_CONFIG_KEY,
|
||||||
|
cfg_section);
|
||||||
|
if (!major_number_str || 0 == major_number_str[0]) {
|
||||||
|
// Default major number of Nvidia devices
|
||||||
|
major_device_number = DEFAULT_NVIDIA_MAJOR_NUMBER;
|
||||||
|
} else {
|
||||||
|
major_device_number = strtol(major_number_str, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get allowed minor device numbers from cfg, if not set, means all minor
|
||||||
|
// devices can be used by YARN
|
||||||
|
allowed_minor_numbers_str = get_section_value(
|
||||||
|
GPU_ALLOWED_DEVICES_MINOR_NUMBERS,
|
||||||
|
cfg_section);
|
||||||
|
if (!allowed_minor_numbers_str || 0 == allowed_minor_numbers_str[0]) {
|
||||||
|
allowed_minor_numbers = NULL;
|
||||||
|
} else {
|
||||||
|
int rc = get_numbers_split_by_comma(allowed_minor_numbers_str,
|
||||||
|
&allowed_minor_numbers,
|
||||||
|
&n_allowed_minor_numbers);
|
||||||
|
if (0 != rc) {
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"Failed to get allowed minor device numbers from cfg, value=%s\n",
|
||||||
|
allowed_minor_numbers_str);
|
||||||
|
return_code = -1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure we're trying to black devices allowed in config
|
||||||
|
for (int i = 0; i < n_minor_devices_to_block; i++) {
|
||||||
|
int found = 0;
|
||||||
|
for (int j = 0; j < n_allowed_minor_numbers; j++) {
|
||||||
|
if (minor_devices[i] == allowed_minor_numbers[j]) {
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!found) {
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"Trying to blacklist device with minor-number=%d which is not on allowed list\n",
|
||||||
|
minor_devices[i]);
|
||||||
|
return_code = -1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use cgroup helpers to blacklist devices
|
||||||
|
for (int i = 0; i < n_minor_devices_to_block; i++) {
|
||||||
|
char param_value[128];
|
||||||
|
memset(param_value, 0, sizeof(param_value));
|
||||||
|
snprintf(param_value, sizeof(param_value), "c %d:%d rwm",
|
||||||
|
major_device_number, i);
|
||||||
|
|
||||||
|
int rc = update_cgroups_parameters_func_p("devices", "deny",
|
||||||
|
container_id, param_value);
|
||||||
|
|
||||||
|
if (0 != rc) {
|
||||||
|
fprintf(ERRORFILE, "CGroups: Failed to update cgroups\n");
|
||||||
|
return_code = -1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (major_number_str) {
|
||||||
|
free(major_number_str);
|
||||||
|
}
|
||||||
|
if (allowed_minor_numbers) {
|
||||||
|
free(allowed_minor_numbers);
|
||||||
|
}
|
||||||
|
if (allowed_minor_numbers_str) {
|
||||||
|
free(allowed_minor_numbers_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
return return_code;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reload_gpu_configuration() {
|
||||||
|
cfg_section = get_configuration_section(GPU_MODULE_SECTION_NAME, get_cfg());
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Format of GPU request commandline:
|
||||||
|
*
|
||||||
|
* c-e gpu --excluded_gpus 0,1,3 --container_id container_x_y
|
||||||
|
*/
|
||||||
|
int handle_gpu_request(update_cgroups_parameters_func func,
|
||||||
|
const char* module_name, int module_argc, char** module_argv) {
|
||||||
|
if (!cfg_section) {
|
||||||
|
reload_gpu_configuration();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!module_enabled(cfg_section, GPU_MODULE_SECTION_NAME)) {
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"Please make sure gpu module is enabled before using it.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct option long_options[] = {
|
||||||
|
{EXCLUDED_GPUS_OPTION, required_argument, 0, 'e' },
|
||||||
|
{CONTAINER_ID_OPTION, required_argument, 0, 'c' },
|
||||||
|
{0, 0, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
int rc = 0;
|
||||||
|
int c = 0;
|
||||||
|
int option_index = 0;
|
||||||
|
|
||||||
|
int* minor_devices = NULL;
|
||||||
|
char container_id[MAX_CONTAINER_ID_LEN];
|
||||||
|
memset(container_id, 0, sizeof(container_id));
|
||||||
|
size_t n_minor_devices_to_block = 0;
|
||||||
|
int failed = 0;
|
||||||
|
|
||||||
|
optind = 1;
|
||||||
|
while((c = getopt_long(module_argc, module_argv, "e:c:",
|
||||||
|
long_options, &option_index)) != -1) {
|
||||||
|
switch(c) {
|
||||||
|
case 'e':
|
||||||
|
rc = get_numbers_split_by_comma(optarg, &minor_devices,
|
||||||
|
&n_minor_devices_to_block);
|
||||||
|
if (0 != rc) {
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"Failed to get minor devices number from command line, value=%s\n",
|
||||||
|
optarg);
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'c':
|
||||||
|
if (!validate_container_id(optarg)) {
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"Specified container_id=%s is invalid\n", optarg);
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
strncpy(container_id, optarg, MAX_CONTAINER_ID_LEN);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"Unknown option in gpu command character %d %c, optionindex = %d\n",
|
||||||
|
c, c, optind);
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == container_id[0]) {
|
||||||
|
fprintf(ERRORFILE,
|
||||||
|
"[%s] --container_id must be specified.\n", __func__);
|
||||||
|
failed = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!minor_devices) {
|
||||||
|
// Minor devices is null, skip following call.
|
||||||
|
fprintf(ERRORFILE, "is not specified, skip cgroups call.\n");
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
failed = internal_handle_gpu_request(func, n_minor_devices_to_block,
|
||||||
|
minor_devices,
|
||||||
|
container_id);
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (minor_devices) {
|
||||||
|
free(minor_devices);
|
||||||
|
}
|
||||||
|
return failed;
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
#define _WITH_GETLINE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _MODULES_GPU_GPU_MUDULE_H_
|
||||||
|
#define _MODULES_GPU_GPU_MUDULE_H_
|
||||||
|
|
||||||
|
#define GPU_MAJOR_NUMBER_CONFIG_KEY "gpu.major-device-number"
|
||||||
|
#define GPU_ALLOWED_DEVICES_MINOR_NUMBERS "gpu.allowed-device-minor-numbers"
|
||||||
|
#define GPU_MODULE_SECTION_NAME "gpu"
|
||||||
|
|
||||||
|
// For unit test stubbing
|
||||||
|
typedef int (*update_cgroups_parameters_func)(const char*, const char*,
|
||||||
|
const char*, const char*);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle gpu requests
|
||||||
|
*/
|
||||||
|
int handle_gpu_request(update_cgroups_parameters_func func,
|
||||||
|
const char* module_name, int module_argc, char** module_argv);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reload config from filesystem, visible for testing.
|
||||||
|
*/
|
||||||
|
void reload_gpu_configuration();
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,121 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#include "configuration.h"
|
||||||
|
#include "container-executor.h"
|
||||||
|
#include "modules/cgroups/cgroups-operations.h"
|
||||||
|
#include "test/test-container-executor-common.h"
|
||||||
|
#include "util.h"
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace ContainerExecutor {
|
||||||
|
|
||||||
|
class TestCGroupsModule : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
virtual void SetUp() {
|
||||||
|
if (mkdirs(TEST_ROOT, 0755) != 0) {
|
||||||
|
fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
LOGFILE = stdout;
|
||||||
|
ERRORFILE = stderr;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void TearDown() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_root) {
|
||||||
|
// Write config file.
|
||||||
|
const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
|
||||||
|
FILE *file = fopen(filename, "w");
|
||||||
|
if (file == NULL) {
|
||||||
|
printf("FAIL: Could not open configuration file: %s\n", filename);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
fprintf(file, "[cgroups]\n");
|
||||||
|
fprintf(file, "yarn-hierarchy=yarn\n");
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
// Read config file
|
||||||
|
read_executor_config(filename);
|
||||||
|
reload_cgroups_configuration();
|
||||||
|
|
||||||
|
char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
|
||||||
|
|
||||||
|
ASSERT_TRUE(NULL == path) << "Should fail.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_yarn_hierarchy) {
|
||||||
|
// Write config file.
|
||||||
|
const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
|
||||||
|
FILE *file = fopen(filename, "w");
|
||||||
|
|
||||||
|
ASSERT_TRUE(file) << "FAIL: Could not open configuration file: " << filename
|
||||||
|
<< "\n";
|
||||||
|
fprintf(file, "[cgroups]\n");
|
||||||
|
fprintf(file, "root=/sys/fs/cgroups\n");
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
// Read config file
|
||||||
|
read_executor_config(filename);
|
||||||
|
reload_cgroups_configuration();
|
||||||
|
char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
|
||||||
|
|
||||||
|
ASSERT_TRUE(NULL == path) << "Should fail.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestCGroupsModule, test_cgroups_get_path_succeeded) {
|
||||||
|
// Write config file.
|
||||||
|
const char *filename = TEST_ROOT "/test_cgroups_get_path.cfg";
|
||||||
|
FILE *file = fopen(filename, "w");
|
||||||
|
|
||||||
|
ASSERT_TRUE(file) << "FAIL: Could not open configuration file\n";
|
||||||
|
fprintf(file, "[cgroups]\n");
|
||||||
|
fprintf(file, "root=/sys/fs/cgroups \n");
|
||||||
|
fprintf(file, "yarn-hierarchy=yarn \n");
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
// Read config file
|
||||||
|
read_executor_config(filename);
|
||||||
|
reload_cgroups_configuration();
|
||||||
|
|
||||||
|
char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
|
||||||
|
ASSERT_TRUE(NULL != path) << "Should success.\n";
|
||||||
|
|
||||||
|
const char *EXPECTED =
|
||||||
|
"/sys/fs/cgroups/devices/yarn/container_1/devices.deny";
|
||||||
|
|
||||||
|
ASSERT_STREQ(EXPECTED, path)
|
||||||
|
<< "Return cgroup-path-to-write is not expected\n";
|
||||||
|
}
|
||||||
|
} // namespace ContainerExecutor
|
|
@ -0,0 +1,203 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#include "configuration.h"
|
||||||
|
#include "container-executor.h"
|
||||||
|
#include "modules/cgroups/cgroups-operations.h"
|
||||||
|
#include "modules/gpu/gpu-module.h"
|
||||||
|
#include "test/test-container-executor-common.h"
|
||||||
|
#include "util.h"
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace ContainerExecutor {
|
||||||
|
|
||||||
|
class TestGpuModule : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
virtual void SetUp() {
|
||||||
|
if (mkdirs(TEST_ROOT, 0755) != 0) {
|
||||||
|
fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
LOGFILE = stdout;
|
||||||
|
ERRORFILE = stderr;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void TearDown() {
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static std::vector<const char*> cgroups_parameters_invoked;
|
||||||
|
|
||||||
|
static int mock_update_cgroups_parameters(
|
||||||
|
const char* controller_name,
|
||||||
|
const char* param_name,
|
||||||
|
const char* group_id,
|
||||||
|
const char* value) {
|
||||||
|
char* buf = (char*) malloc(128);
|
||||||
|
strcpy(buf, controller_name);
|
||||||
|
cgroups_parameters_invoked.push_back(buf);
|
||||||
|
|
||||||
|
buf = (char*) malloc(128);
|
||||||
|
strcpy(buf, param_name);
|
||||||
|
cgroups_parameters_invoked.push_back(buf);
|
||||||
|
|
||||||
|
buf = (char*) malloc(128);
|
||||||
|
strcpy(buf, group_id);
|
||||||
|
cgroups_parameters_invoked.push_back(buf);
|
||||||
|
|
||||||
|
buf = (char*) malloc(128);
|
||||||
|
strcpy(buf, value);
|
||||||
|
cgroups_parameters_invoked.push_back(buf);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void verify_param_updated_to_cgroups(
|
||||||
|
int argc, const char** argv) {
|
||||||
|
ASSERT_EQ(argc, cgroups_parameters_invoked.size());
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
while (offset < argc) {
|
||||||
|
ASSERT_STREQ(argv[offset], cgroups_parameters_invoked[offset]);
|
||||||
|
offset++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_and_load_gpu_module_to_cfg(const char* cfg_filepath, int enabled) {
|
||||||
|
FILE *file = fopen(cfg_filepath, "w");
|
||||||
|
if (file == NULL) {
|
||||||
|
printf("FAIL: Could not open configuration file: %s\n", cfg_filepath);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
fprintf(file, "[gpu]\n");
|
||||||
|
if (enabled) {
|
||||||
|
fprintf(file, "module.enabled=true\n");
|
||||||
|
} else {
|
||||||
|
fprintf(file, "module.enabled=false\n");
|
||||||
|
}
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
// Read config file
|
||||||
|
read_executor_config(cfg_filepath);
|
||||||
|
reload_gpu_configuration();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_gpu_module_enabled_disabled(int enabled) {
|
||||||
|
// Write config file.
|
||||||
|
const char *filename = TEST_ROOT "/test_cgroups_module_enabled_disabled.cfg";
|
||||||
|
write_and_load_gpu_module_to_cfg(filename, enabled);
|
||||||
|
|
||||||
|
char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||||
|
(char*) "--container_id",
|
||||||
|
(char*) "container_1498064906505_0001_01_000001" };
|
||||||
|
|
||||||
|
int rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||||
|
"gpu", 5, argv);
|
||||||
|
|
||||||
|
int EXPECTED_RC;
|
||||||
|
if (enabled) {
|
||||||
|
EXPECTED_RC = 0;
|
||||||
|
} else {
|
||||||
|
EXPECTED_RC = -1;
|
||||||
|
}
|
||||||
|
ASSERT_EQ(EXPECTED_RC, rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestGpuModule, test_verify_gpu_module_calls_cgroup_parameter) {
|
||||||
|
// Write config file.
|
||||||
|
const char *filename = TEST_ROOT "/test_verify_gpu_module_calls_cgroup_parameter.cfg";
|
||||||
|
write_and_load_gpu_module_to_cfg(filename, 1);
|
||||||
|
|
||||||
|
char* container_id = (char*) "container_1498064906505_0001_01_000001";
|
||||||
|
char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||||
|
(char*) "--container_id",
|
||||||
|
container_id };
|
||||||
|
|
||||||
|
/* Test case 1: block 2 devices */
|
||||||
|
cgroups_parameters_invoked.clear();
|
||||||
|
int rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||||
|
"gpu", 5, argv);
|
||||||
|
ASSERT_EQ(0, rc) << "Should success.\n";
|
||||||
|
|
||||||
|
// Verify cgroups parameters
|
||||||
|
const char* expected_cgroups_argv[] = { "devices", "deny", container_id, "c 195:0 rwm",
|
||||||
|
"devices", "deny", container_id, "c 195:1 rwm"};
|
||||||
|
verify_param_updated_to_cgroups(8, expected_cgroups_argv);
|
||||||
|
|
||||||
|
/* Test case 2: block 0 devices */
|
||||||
|
cgroups_parameters_invoked.clear();
|
||||||
|
char* argv_1[] = { (char*) "--module-gpu", (char*) "--container_id", container_id };
|
||||||
|
rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||||
|
"gpu", 3, argv_1);
|
||||||
|
ASSERT_EQ(0, rc) << "Should success.\n";
|
||||||
|
|
||||||
|
// Verify cgroups parameters
|
||||||
|
verify_param_updated_to_cgroups(0, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestGpuModule, test_illegal_cli_parameters) {
|
||||||
|
// Write config file.
|
||||||
|
const char *filename = TEST_ROOT "/test_illegal_cli_parameters.cfg";
|
||||||
|
write_and_load_gpu_module_to_cfg(filename, 1);
|
||||||
|
|
||||||
|
// Illegal container id - 1
|
||||||
|
char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||||
|
(char*) "--container_id", (char*) "xxxx" };
|
||||||
|
int rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||||
|
"gpu", 5, argv);
|
||||||
|
ASSERT_NE(0, rc) << "Should fail.\n";
|
||||||
|
|
||||||
|
// Illegal container id - 2
|
||||||
|
char* argv_1[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||||
|
(char*) "--container_id", (char*) "container_1" };
|
||||||
|
rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||||
|
"gpu", 5, argv_1);
|
||||||
|
ASSERT_NE(0, rc) << "Should fail.\n";
|
||||||
|
|
||||||
|
// Illegal container id - 3
|
||||||
|
char* argv_2[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1" };
|
||||||
|
rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||||
|
"gpu", 3, argv_2);
|
||||||
|
ASSERT_NE(0, rc) << "Should fail.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestGpuModule, test_gpu_module_disabled) {
|
||||||
|
test_gpu_module_enabled_disabled(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestGpuModule, test_gpu_module_enabled) {
|
||||||
|
test_gpu_module_enabled_disabled(1);
|
||||||
|
}
|
||||||
|
} // namespace ContainerExecutor
|
|
@ -1404,7 +1404,6 @@ int main(int argc, char **argv) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
test_trim_function();
|
test_trim_function();
|
||||||
run("rm -fr " TEST_ROOT);
|
|
||||||
printf("\nFinished tests\n");
|
printf("\nFinished tests\n");
|
||||||
|
|
||||||
free(current_username);
|
free(current_username);
|
||||||
|
|
Loading…
Reference in New Issue