YARN-9187. Backport YARN-6852 for GPU-specific native changes to branch-2
This commit is contained in:
parent
078dfb09fe
commit
56259bcecb
|
@ -101,9 +101,11 @@ add_library(container
|
|||
main/native/container-executor/impl/container-executor.c
|
||||
main/native/container-executor/impl/get_executable.c
|
||||
main/native/container-executor/impl/utils/string-utils.c
|
||||
main/native/container-executor/impl/utils/path-utils.c
|
||||
main/native/container-executor/impl/modules/common/module-configs.c
|
||||
main/native/container-executor/impl/utils/docker-util.c
|
||||
main/native/container-executor/impl/utils/path-utils.c
|
||||
main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
|
||||
main/native/container-executor/impl/modules/common/module-configs.c
|
||||
main/native/container-executor/impl/modules/gpu/gpu-module.c
|
||||
)
|
||||
|
||||
add_executable(container-executor
|
||||
|
@ -135,6 +137,8 @@ add_executable(cetest
|
|||
main/native/container-executor/test/utils/test-string-utils.cc
|
||||
main/native/container-executor/test/utils/test-path-utils.cc
|
||||
main/native/container-executor/test/test_util.cc
|
||||
main/native/container-executor/test/utils/test_docker_util.cc)
|
||||
main/native/container-executor/test/utils/test_docker_util.cc
|
||||
main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
|
||||
main/native/container-executor/test/modules/gpu/test-gpu-module.cc)
|
||||
target_link_libraries(cetest gtest container)
|
||||
output_directory(cetest test)
|
||||
|
|
|
@ -285,3 +285,5 @@ int execute_regex_match(const char *regex_str, const char *input);
|
|||
* Return 0 on success.
|
||||
*/
|
||||
int validate_docker_image_name(const char *image_name);
|
||||
|
||||
struct configuration* get_cfg();
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#include "util.h"
|
||||
#include "get_executable.h"
|
||||
#include "utils/string-utils.h"
|
||||
#include "modules/gpu/gpu-module.h"
|
||||
#include "modules/cgroups/cgroups-operations.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <grp.h>
|
||||
|
@ -241,6 +243,14 @@ static int validate_arguments(int argc, char **argv , int *operation) {
|
|||
return INVALID_ARGUMENT_NUMBER;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if it is a known module, if yes, redirect to module
|
||||
*/
|
||||
if (strcmp("--module-gpu", argv[1]) == 0) {
|
||||
return handle_gpu_request(&update_cgroups_parameters, "gpu", argc - 1,
|
||||
&argv[1]);
|
||||
}
|
||||
|
||||
if (strcmp("--checksetup", argv[1]) == 0) {
|
||||
*operation = CHECK_SETUP;
|
||||
return 0;
|
||||
|
@ -325,6 +335,7 @@ static int validate_arguments(int argc, char **argv , int *operation) {
|
|||
return FEATURE_DISABLED;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now we have to validate 'run as user' operations that don't use
|
||||
a 'long option' - we should fix this at some point. The validation/argument
|
||||
parsing here is extensive enough that it done in a separate function */
|
||||
|
|
|
@ -0,0 +1,161 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "configuration.h"
|
||||
#include "container-executor.h"
|
||||
#include "utils/string-utils.h"
|
||||
#include "utils/path-utils.h"
|
||||
#include "modules/common/module-configs.h"
|
||||
#include "modules/common/constants.h"
|
||||
#include "modules/cgroups/cgroups-operations.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#define MAX_PATH_LEN 4096
|
||||
|
||||
static const struct section* cgroup_cfg_section = NULL;
|
||||
|
||||
void reload_cgroups_configuration() {
|
||||
cgroup_cfg_section = get_configuration_section(CGROUPS_SECTION_NAME, get_cfg());
|
||||
}
|
||||
|
||||
char* get_cgroups_path_to_write(
|
||||
const char* hierarchy_name,
|
||||
const char* param_name,
|
||||
const char* group_id) {
|
||||
int failed = 0;
|
||||
char* buffer = NULL;
|
||||
const char* cgroups_root = get_section_value(CGROUPS_ROOT_KEY,
|
||||
cgroup_cfg_section);
|
||||
const char* yarn_hierarchy_name = get_section_value(
|
||||
CGROUPS_YARN_HIERARCHY_KEY, cgroup_cfg_section);
|
||||
|
||||
// Make sure it is defined.
|
||||
if (!cgroups_root || cgroups_root[0] == 0) {
|
||||
fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
|
||||
CGROUPS_ROOT_KEY);
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Make sure it is defined.
|
||||
if (!yarn_hierarchy_name || yarn_hierarchy_name[0] == 0) {
|
||||
fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
|
||||
CGROUPS_YARN_HIERARCHY_KEY);
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
buffer = malloc(MAX_PATH_LEN + 1);
|
||||
if (!buffer) {
|
||||
fprintf(ERRORFILE, "Failed to allocate memory for output path.\n");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Make a path.
|
||||
// CGroups path should not be too long.
|
||||
if (snprintf(buffer, MAX_PATH_LEN, "%s/%s/%s/%s/%s.%s",
|
||||
cgroups_root, hierarchy_name, yarn_hierarchy_name,
|
||||
group_id, hierarchy_name, param_name) < 0) {
|
||||
fprintf(ERRORFILE, "Failed to print output path.\n");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (failed) {
|
||||
if (buffer) {
|
||||
free(buffer);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
int update_cgroups_parameters(
|
||||
const char* hierarchy_name,
|
||||
const char* param_name,
|
||||
const char* group_id,
|
||||
const char* value) {
|
||||
#ifndef __linux
|
||||
fprintf(ERRORFILE, "Failed to update cgroups parameters, not supported\n");
|
||||
return -1;
|
||||
#endif
|
||||
int failure = 0;
|
||||
|
||||
if (!cgroup_cfg_section) {
|
||||
reload_cgroups_configuration();
|
||||
}
|
||||
|
||||
char* full_path = get_cgroups_path_to_write(hierarchy_name, param_name,
|
||||
group_id);
|
||||
|
||||
if (!full_path) {
|
||||
fprintf(ERRORFILE,
|
||||
"Failed to get cgroups path to write, it should be a configuration issue");
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!verify_path_safety(full_path)) {
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Make sure file exists
|
||||
struct stat sb;
|
||||
if (stat(full_path, &sb) != 0) {
|
||||
fprintf(ERRORFILE, "CGroups: Could not find file to write, %s", full_path);
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fprintf(ERRORFILE, "CGroups: Updating cgroups, path=%s, value=%s",
|
||||
full_path, value);
|
||||
|
||||
// Write values to file
|
||||
FILE *f;
|
||||
f = fopen(full_path, "a");
|
||||
if (!f) {
|
||||
fprintf(ERRORFILE, "CGroups: Failed to open cgroups file, %s", full_path);
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
if (fprintf(f, "%s", value) < 0) {
|
||||
fprintf(ERRORFILE, "CGroups: Failed to write cgroups file, %s", full_path);
|
||||
fclose(f);
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
if (fclose(f) != 0) {
|
||||
fprintf(ERRORFILE, "CGroups: Failed to close cgroups file, %s", full_path);
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (full_path) {
|
||||
free(full_path);
|
||||
}
|
||||
return -failure;
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _CGROUPS_OPERATIONS_H_
|
||||
#define _CGROUPS_OPERATIONS_H_
|
||||
|
||||
#define CGROUPS_SECTION_NAME "cgroups"
|
||||
#define CGROUPS_ROOT_KEY "root"
|
||||
#define CGROUPS_YARN_HIERARCHY_KEY "yarn-hierarchy"
|
||||
|
||||
/**
|
||||
* Handle update CGroups parameter update requests:
|
||||
* - hierarchy_name: e.g. devices / cpu,cpuacct
|
||||
* - param_name: e.g. deny
|
||||
* - group_id: e.g. container_x_y
|
||||
* - value: e.g. "a *:* rwm"
|
||||
*
|
||||
* return 0 if succeeded
|
||||
*/
|
||||
int update_cgroups_parameters(
|
||||
const char* hierarchy_name,
|
||||
const char* param_name,
|
||||
const char* group_id,
|
||||
const char* value);
|
||||
|
||||
/**
|
||||
* Get CGroups path to update. Visible for testing.
|
||||
* Return 0 if succeeded
|
||||
*/
|
||||
char* get_cgroups_path_to_write(
|
||||
const char* hierarchy_name,
|
||||
const char* param_name,
|
||||
const char* group_id);
|
||||
|
||||
/**
|
||||
* Reload config from filesystem, visible for testing.
|
||||
*/
|
||||
void reload_cgroups_configuration();
|
||||
|
||||
#endif
|
|
@ -0,0 +1,229 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "configuration.h"
|
||||
#include "container-executor.h"
|
||||
#include "utils/string-utils.h"
|
||||
#include "modules/gpu/gpu-module.h"
|
||||
#include "modules/cgroups/cgroups-operations.h"
|
||||
#include "modules/common/module-configs.h"
|
||||
#include "modules/common/constants.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define EXCLUDED_GPUS_OPTION "excluded_gpus"
|
||||
#define CONTAINER_ID_OPTION "container_id"
|
||||
#define DEFAULT_NVIDIA_MAJOR_NUMBER 195
|
||||
#define MAX_CONTAINER_ID_LEN 128
|
||||
|
||||
static const struct section* cfg_section;
|
||||
|
||||
static int internal_handle_gpu_request(
|
||||
update_cgroups_parameters_func update_cgroups_parameters_func_p,
|
||||
size_t n_minor_devices_to_block, int minor_devices[],
|
||||
const char* container_id) {
|
||||
char* allowed_minor_numbers_str = NULL;
|
||||
int* allowed_minor_numbers = NULL;
|
||||
size_t n_allowed_minor_numbers = 0;
|
||||
int return_code = 0;
|
||||
|
||||
if (n_minor_devices_to_block == 0) {
|
||||
// no device to block, just return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get major device number from cfg, if not set, major number of (Nvidia)
|
||||
// will be the default value.
|
||||
int major_device_number;
|
||||
char* major_number_str = get_section_value(GPU_MAJOR_NUMBER_CONFIG_KEY,
|
||||
cfg_section);
|
||||
if (!major_number_str || 0 == major_number_str[0]) {
|
||||
// Default major number of Nvidia devices
|
||||
major_device_number = DEFAULT_NVIDIA_MAJOR_NUMBER;
|
||||
} else {
|
||||
major_device_number = strtol(major_number_str, NULL, 0);
|
||||
}
|
||||
|
||||
// Get allowed minor device numbers from cfg, if not set, means all minor
|
||||
// devices can be used by YARN
|
||||
allowed_minor_numbers_str = get_section_value(
|
||||
GPU_ALLOWED_DEVICES_MINOR_NUMBERS,
|
||||
cfg_section);
|
||||
if (!allowed_minor_numbers_str || 0 == allowed_minor_numbers_str[0]) {
|
||||
allowed_minor_numbers = NULL;
|
||||
} else {
|
||||
int rc = get_numbers_split_by_comma(allowed_minor_numbers_str,
|
||||
&allowed_minor_numbers,
|
||||
&n_allowed_minor_numbers);
|
||||
if (0 != rc) {
|
||||
fprintf(ERRORFILE,
|
||||
"Failed to get allowed minor device numbers from cfg, value=%s\n",
|
||||
allowed_minor_numbers_str);
|
||||
return_code = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Make sure we're trying to black devices allowed in config
|
||||
for (int i = 0; i < n_minor_devices_to_block; i++) {
|
||||
int found = 0;
|
||||
for (int j = 0; j < n_allowed_minor_numbers; j++) {
|
||||
if (minor_devices[i] == allowed_minor_numbers[j]) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
fprintf(ERRORFILE,
|
||||
"Trying to blacklist device with minor-number=%d which is not on allowed list\n",
|
||||
minor_devices[i]);
|
||||
return_code = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use cgroup helpers to blacklist devices
|
||||
for (int i = 0; i < n_minor_devices_to_block; i++) {
|
||||
char param_value[128];
|
||||
memset(param_value, 0, sizeof(param_value));
|
||||
snprintf(param_value, sizeof(param_value), "c %d:%d rwm",
|
||||
major_device_number, i);
|
||||
|
||||
int rc = update_cgroups_parameters_func_p("devices", "deny",
|
||||
container_id, param_value);
|
||||
|
||||
if (0 != rc) {
|
||||
fprintf(ERRORFILE, "CGroups: Failed to update cgroups\n");
|
||||
return_code = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (major_number_str) {
|
||||
free(major_number_str);
|
||||
}
|
||||
if (allowed_minor_numbers) {
|
||||
free(allowed_minor_numbers);
|
||||
}
|
||||
if (allowed_minor_numbers_str) {
|
||||
free(allowed_minor_numbers_str);
|
||||
}
|
||||
|
||||
return return_code;
|
||||
}
|
||||
|
||||
void reload_gpu_configuration() {
|
||||
cfg_section = get_configuration_section(GPU_MODULE_SECTION_NAME, get_cfg());
|
||||
}
|
||||
|
||||
/*
|
||||
* Format of GPU request commandline:
|
||||
*
|
||||
* c-e gpu --excluded_gpus 0,1,3 --container_id container_x_y
|
||||
*/
|
||||
int handle_gpu_request(update_cgroups_parameters_func func,
|
||||
const char* module_name, int module_argc, char** module_argv) {
|
||||
if (!cfg_section) {
|
||||
reload_gpu_configuration();
|
||||
}
|
||||
|
||||
if (!module_enabled(cfg_section, GPU_MODULE_SECTION_NAME)) {
|
||||
fprintf(ERRORFILE,
|
||||
"Please make sure gpu module is enabled before using it.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static struct option long_options[] = {
|
||||
{EXCLUDED_GPUS_OPTION, required_argument, 0, 'e' },
|
||||
{CONTAINER_ID_OPTION, required_argument, 0, 'c' },
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
int rc = 0;
|
||||
int c = 0;
|
||||
int option_index = 0;
|
||||
|
||||
int* minor_devices = NULL;
|
||||
char container_id[MAX_CONTAINER_ID_LEN];
|
||||
memset(container_id, 0, sizeof(container_id));
|
||||
size_t n_minor_devices_to_block = 0;
|
||||
int failed = 0;
|
||||
|
||||
optind = 1;
|
||||
while((c = getopt_long(module_argc, module_argv, "e:c:",
|
||||
long_options, &option_index)) != -1) {
|
||||
switch(c) {
|
||||
case 'e':
|
||||
rc = get_numbers_split_by_comma(optarg, &minor_devices,
|
||||
&n_minor_devices_to_block);
|
||||
if (0 != rc) {
|
||||
fprintf(ERRORFILE,
|
||||
"Failed to get minor devices number from command line, value=%s\n",
|
||||
optarg);
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if (!validate_container_id(optarg)) {
|
||||
fprintf(ERRORFILE,
|
||||
"Specified container_id=%s is invalid\n", optarg);
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
strncpy(container_id, optarg, MAX_CONTAINER_ID_LEN);
|
||||
break;
|
||||
default:
|
||||
fprintf(ERRORFILE,
|
||||
"Unknown option in gpu command character %d %c, optionindex = %d\n",
|
||||
c, c, optind);
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == container_id[0]) {
|
||||
fprintf(ERRORFILE,
|
||||
"[%s] --container_id must be specified.\n", __func__);
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!minor_devices) {
|
||||
// Minor devices is null, skip following call.
|
||||
fprintf(ERRORFILE, "is not specified, skip cgroups call.\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
failed = internal_handle_gpu_request(func, n_minor_devices_to_block,
|
||||
minor_devices,
|
||||
container_id);
|
||||
|
||||
cleanup:
|
||||
if (minor_devices) {
|
||||
free(minor_devices);
|
||||
}
|
||||
return failed;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#define _WITH_GETLINE
|
||||
#endif
|
||||
|
||||
#ifndef _MODULES_GPU_GPU_MUDULE_H_
|
||||
#define _MODULES_GPU_GPU_MUDULE_H_
|
||||
|
||||
#define GPU_MAJOR_NUMBER_CONFIG_KEY "gpu.major-device-number"
|
||||
#define GPU_ALLOWED_DEVICES_MINOR_NUMBERS "gpu.allowed-device-minor-numbers"
|
||||
#define GPU_MODULE_SECTION_NAME "gpu"
|
||||
|
||||
// For unit test stubbing
|
||||
typedef int (*update_cgroups_parameters_func)(const char*, const char*,
|
||||
const char*, const char*);
|
||||
|
||||
/**
|
||||
* Handle gpu requests
|
||||
*/
|
||||
int handle_gpu_request(update_cgroups_parameters_func func,
|
||||
const char* module_name, int module_argc, char** module_argv);
|
||||
|
||||
/**
|
||||
* Reload config from filesystem, visible for testing.
|
||||
*/
|
||||
void reload_gpu_configuration();
|
||||
|
||||
#endif
|
|
@ -0,0 +1,121 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <sstream>
|
||||
|
||||
extern "C" {
|
||||
#include "configuration.h"
|
||||
#include "container-executor.h"
|
||||
#include "modules/cgroups/cgroups-operations.h"
|
||||
#include "test/test-container-executor-common.h"
|
||||
#include "util.h"
|
||||
}
|
||||
|
||||
namespace ContainerExecutor {
|
||||
|
||||
class TestCGroupsModule : public ::testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
if (mkdirs(TEST_ROOT, 0755) != 0) {
|
||||
fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
|
||||
exit(1);
|
||||
}
|
||||
LOGFILE = stdout;
|
||||
ERRORFILE = stderr;
|
||||
}
|
||||
|
||||
virtual void TearDown() {}
|
||||
};
|
||||
|
||||
TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_root) {
|
||||
// Write config file.
|
||||
const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
|
||||
FILE *file = fopen(filename, "w");
|
||||
if (file == NULL) {
|
||||
printf("FAIL: Could not open configuration file: %s\n", filename);
|
||||
exit(1);
|
||||
}
|
||||
fprintf(file, "[cgroups]\n");
|
||||
fprintf(file, "yarn-hierarchy=yarn\n");
|
||||
fclose(file);
|
||||
|
||||
// Read config file
|
||||
read_executor_config(filename);
|
||||
reload_cgroups_configuration();
|
||||
|
||||
char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
|
||||
|
||||
ASSERT_TRUE(NULL == path) << "Should fail.\n";
|
||||
}
|
||||
|
||||
TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_yarn_hierarchy) {
|
||||
// Write config file.
|
||||
const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
|
||||
FILE *file = fopen(filename, "w");
|
||||
|
||||
ASSERT_TRUE(file) << "FAIL: Could not open configuration file: " << filename
|
||||
<< "\n";
|
||||
fprintf(file, "[cgroups]\n");
|
||||
fprintf(file, "root=/sys/fs/cgroups\n");
|
||||
fclose(file);
|
||||
|
||||
// Read config file
|
||||
read_executor_config(filename);
|
||||
reload_cgroups_configuration();
|
||||
char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
|
||||
|
||||
ASSERT_TRUE(NULL == path) << "Should fail.\n";
|
||||
}
|
||||
|
||||
TEST_F(TestCGroupsModule, test_cgroups_get_path_succeeded) {
|
||||
// Write config file.
|
||||
const char *filename = TEST_ROOT "/test_cgroups_get_path.cfg";
|
||||
FILE *file = fopen(filename, "w");
|
||||
|
||||
ASSERT_TRUE(file) << "FAIL: Could not open configuration file\n";
|
||||
fprintf(file, "[cgroups]\n");
|
||||
fprintf(file, "root=/sys/fs/cgroups \n");
|
||||
fprintf(file, "yarn-hierarchy=yarn \n");
|
||||
fclose(file);
|
||||
|
||||
// Read config file
|
||||
read_executor_config(filename);
|
||||
reload_cgroups_configuration();
|
||||
|
||||
char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
|
||||
ASSERT_TRUE(NULL != path) << "Should success.\n";
|
||||
|
||||
const char *EXPECTED =
|
||||
"/sys/fs/cgroups/devices/yarn/container_1/devices.deny";
|
||||
|
||||
ASSERT_STREQ(EXPECTED, path)
|
||||
<< "Return cgroup-path-to-write is not expected\n";
|
||||
}
|
||||
} // namespace ContainerExecutor
|
|
@ -0,0 +1,203 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <sstream>
|
||||
|
||||
extern "C" {
|
||||
#include "configuration.h"
|
||||
#include "container-executor.h"
|
||||
#include "modules/cgroups/cgroups-operations.h"
|
||||
#include "modules/gpu/gpu-module.h"
|
||||
#include "test/test-container-executor-common.h"
|
||||
#include "util.h"
|
||||
}
|
||||
|
||||
namespace ContainerExecutor {
|
||||
|
||||
class TestGpuModule : public ::testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
if (mkdirs(TEST_ROOT, 0755) != 0) {
|
||||
fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
|
||||
exit(1);
|
||||
}
|
||||
LOGFILE = stdout;
|
||||
ERRORFILE = stderr;
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
static std::vector<const char*> cgroups_parameters_invoked;
|
||||
|
||||
static int mock_update_cgroups_parameters(
|
||||
const char* controller_name,
|
||||
const char* param_name,
|
||||
const char* group_id,
|
||||
const char* value) {
|
||||
char* buf = (char*) malloc(128);
|
||||
strcpy(buf, controller_name);
|
||||
cgroups_parameters_invoked.push_back(buf);
|
||||
|
||||
buf = (char*) malloc(128);
|
||||
strcpy(buf, param_name);
|
||||
cgroups_parameters_invoked.push_back(buf);
|
||||
|
||||
buf = (char*) malloc(128);
|
||||
strcpy(buf, group_id);
|
||||
cgroups_parameters_invoked.push_back(buf);
|
||||
|
||||
buf = (char*) malloc(128);
|
||||
strcpy(buf, value);
|
||||
cgroups_parameters_invoked.push_back(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void verify_param_updated_to_cgroups(
|
||||
int argc, const char** argv) {
|
||||
ASSERT_EQ(argc, cgroups_parameters_invoked.size());
|
||||
|
||||
int offset = 0;
|
||||
while (offset < argc) {
|
||||
ASSERT_STREQ(argv[offset], cgroups_parameters_invoked[offset]);
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
static void write_and_load_gpu_module_to_cfg(const char* cfg_filepath, int enabled) {
|
||||
FILE *file = fopen(cfg_filepath, "w");
|
||||
if (file == NULL) {
|
||||
printf("FAIL: Could not open configuration file: %s\n", cfg_filepath);
|
||||
exit(1);
|
||||
}
|
||||
fprintf(file, "[gpu]\n");
|
||||
if (enabled) {
|
||||
fprintf(file, "module.enabled=true\n");
|
||||
} else {
|
||||
fprintf(file, "module.enabled=false\n");
|
||||
}
|
||||
fclose(file);
|
||||
|
||||
// Read config file
|
||||
read_executor_config(cfg_filepath);
|
||||
reload_gpu_configuration();
|
||||
}
|
||||
|
||||
static void test_gpu_module_enabled_disabled(int enabled) {
|
||||
// Write config file.
|
||||
const char *filename = TEST_ROOT "/test_cgroups_module_enabled_disabled.cfg";
|
||||
write_and_load_gpu_module_to_cfg(filename, enabled);
|
||||
|
||||
char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||
(char*) "--container_id",
|
||||
(char*) "container_1498064906505_0001_01_000001" };
|
||||
|
||||
int rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||
"gpu", 5, argv);
|
||||
|
||||
int EXPECTED_RC;
|
||||
if (enabled) {
|
||||
EXPECTED_RC = 0;
|
||||
} else {
|
||||
EXPECTED_RC = -1;
|
||||
}
|
||||
ASSERT_EQ(EXPECTED_RC, rc);
|
||||
}
|
||||
|
||||
TEST_F(TestGpuModule, test_verify_gpu_module_calls_cgroup_parameter) {
|
||||
// Write config file.
|
||||
const char *filename = TEST_ROOT "/test_verify_gpu_module_calls_cgroup_parameter.cfg";
|
||||
write_and_load_gpu_module_to_cfg(filename, 1);
|
||||
|
||||
char* container_id = (char*) "container_1498064906505_0001_01_000001";
|
||||
char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||
(char*) "--container_id",
|
||||
container_id };
|
||||
|
||||
/* Test case 1: block 2 devices */
|
||||
cgroups_parameters_invoked.clear();
|
||||
int rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||
"gpu", 5, argv);
|
||||
ASSERT_EQ(0, rc) << "Should success.\n";
|
||||
|
||||
// Verify cgroups parameters
|
||||
const char* expected_cgroups_argv[] = { "devices", "deny", container_id, "c 195:0 rwm",
|
||||
"devices", "deny", container_id, "c 195:1 rwm"};
|
||||
verify_param_updated_to_cgroups(8, expected_cgroups_argv);
|
||||
|
||||
/* Test case 2: block 0 devices */
|
||||
cgroups_parameters_invoked.clear();
|
||||
char* argv_1[] = { (char*) "--module-gpu", (char*) "--container_id", container_id };
|
||||
rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||
"gpu", 3, argv_1);
|
||||
ASSERT_EQ(0, rc) << "Should success.\n";
|
||||
|
||||
// Verify cgroups parameters
|
||||
verify_param_updated_to_cgroups(0, NULL);
|
||||
}
|
||||
|
||||
TEST_F(TestGpuModule, test_illegal_cli_parameters) {
|
||||
// Write config file.
|
||||
const char *filename = TEST_ROOT "/test_illegal_cli_parameters.cfg";
|
||||
write_and_load_gpu_module_to_cfg(filename, 1);
|
||||
|
||||
// Illegal container id - 1
|
||||
char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||
(char*) "--container_id", (char*) "xxxx" };
|
||||
int rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||
"gpu", 5, argv);
|
||||
ASSERT_NE(0, rc) << "Should fail.\n";
|
||||
|
||||
// Illegal container id - 2
|
||||
char* argv_1[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
|
||||
(char*) "--container_id", (char*) "container_1" };
|
||||
rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||
"gpu", 5, argv_1);
|
||||
ASSERT_NE(0, rc) << "Should fail.\n";
|
||||
|
||||
// Illegal container id - 3
|
||||
char* argv_2[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1" };
|
||||
rc = handle_gpu_request(&mock_update_cgroups_parameters,
|
||||
"gpu", 3, argv_2);
|
||||
ASSERT_NE(0, rc) << "Should fail.\n";
|
||||
}
|
||||
|
||||
TEST_F(TestGpuModule, test_gpu_module_disabled) {
|
||||
test_gpu_module_enabled_disabled(0);
|
||||
}
|
||||
|
||||
TEST_F(TestGpuModule, test_gpu_module_enabled) {
|
||||
test_gpu_module_enabled_disabled(1);
|
||||
}
|
||||
} // namespace ContainerExecutor
|
|
@ -1404,7 +1404,6 @@ int main(int argc, char **argv) {
|
|||
#endif
|
||||
|
||||
test_trim_function();
|
||||
run("rm -fr " TEST_ROOT);
|
||||
printf("\nFinished tests\n");
|
||||
|
||||
free(current_username);
|
||||
|
|
Loading…
Reference in New Issue