diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml index e089b836ed2..f9c77b99399 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml @@ -9,11 +9,8 @@ - hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/target/classes/bin + hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/target/native/target/usr/local/bin bin - - container-executor - 0755 @@ -54,7 +51,6 @@ **/SecurityAuth.audit* - assembly/** pom.xml build*.xml ivy.xml diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index f1261bd8167..cbb7fe337c0 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -394,6 +394,9 @@ Release 0.23.0 - Unreleased MAPREDUCE-3014. Rename and invert logic of '-cbuild' profile to 'native' and off by default. (tucu) + MAPREDUCE-3171. normalize nodemanager native code compilation with common/hdfs + native. (tucu) + OPTIMIZATIONS MAPREDUCE-2026. Make JobTracker.getJobCounters() and diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml index b3e30bddaef..7017342993c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml @@ -47,72 +47,43 @@ 1.0-beta-1 - clean - clean - - - -i - - src/main/c/container-executor - target - - - autoreconf - make-clean - - - - make + compile compile - - src/main/c/container-executor - - - CFLAGS - -DHADOOP_CONF_DIR=${container-executor.conf.dir} - - - - - src/main/c/container-executor - - - src/main/c/container-executor - target - ${project.build.outputDirectory} - autoreconf configure - compile + make-install - make-test + test test - - src/main/c/container-executor - - - CFLAGS - -DHADOOP_CONF_DIR=${container-executor.conf.dir} - - - - - src/main/c/container-executor - - - src/main/c/container-executor - target - ${project.build.outputDirectory} - check - - compile + test + + + ${project.build.directory}/native/container-executor + + -i + + + + + + CFLAGS + -DHADOOP_CONF_DIR=${container-executor.conf.dir} + + + ${project.build.directory}/native/container-executor + /usr/local + + + ${project.build.directory}/native/target + + @@ -191,6 +162,21 @@ run + + compile + generate-sources + + run + + + + + + + + + + diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/.gitignore b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/.gitignore deleted file mode 100644 index 573db286fc6..00000000000 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/.gitignore +++ /dev/null @@ -1,21 +0,0 @@ -Makefile -Makefile.in -aclocal.m4 -compile -config.log -config.status -configure -depcomp -impl/.deps/ -impl/.dirstamp -impl/configuration.o -impl/main.o -impl/container-executor.o -install-sh -libtool -missing -container-executor -test/.deps/ -test-task-controller -test/.dirstamp -test/test-task-controller.o diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/container-executor.h b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/container-executor.h deleted file mode 100644 index 3f0e8a5aa2c..00000000000 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/container-executor.h +++ /dev/null @@ -1,195 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -//command definitions -enum command { - INITIALIZE_CONTAINER = 0, - LAUNCH_CONTAINER = 1, - SIGNAL_CONTAINER = 2, - DELETE_AS_USER = 3, -}; - -enum errorcodes { - INVALID_ARGUMENT_NUMBER = 1, - INVALID_USER_NAME, //2 - INVALID_COMMAND_PROVIDED, //3 - // SUPER_USER_NOT_ALLOWED_TO_RUN_TASKS (NOT USED) 4 - INVALID_NM_ROOT_DIRS = 5, - SETUID_OPER_FAILED, //6 - UNABLE_TO_EXECUTE_CONTAINER_SCRIPT, //7 - UNABLE_TO_SIGNAL_CONTAINER, //8 - INVALID_CONTAINER_PID, //9 - // ERROR_RESOLVING_FILE_PATH (NOT_USED) 10 - // RELATIVE_PATH_COMPONENTS_IN_FILE_PATH (NOT USED) 11 - // UNABLE_TO_STAT_FILE (NOT USED) 12 - // FILE_NOT_OWNED_BY_ROOT (NOT USED) 13 - // PREPARE_CONTAINER_DIRECTORIES_FAILED (NOT USED) 14 - // INITIALIZE_CONTAINER_FAILED (NOT USED) 15 - // PREPARE_CONTAINER_LOGS_FAILED (NOT USED) 16 - // INVALID_LOG_DIR (NOT USED) 17 - OUT_OF_MEMORY = 18, - // INITIALIZE_DISTCACHEFILE_FAILED (NOT USED) 19 - INITIALIZE_USER_FAILED = 20, - UNABLE_TO_BUILD_PATH, //21 - INVALID_CONTAINER_EXEC_PERMISSIONS, //22 - // PREPARE_JOB_LOGS_FAILED (NOT USED) 23 - INVALID_CONFIG_FILE = 24, - SETSID_OPER_FAILED = 25, - WRITE_PIDFILE_FAILED = 26 -}; - -#define NM_GROUP_KEY "yarn.nodemanager.linux-container-executor.group" -#define USER_DIR_PATTERN "%s/usercache/%s" -#define NM_APP_DIR_PATTERN USER_DIR_PATTERN "/appcache/%s" -#define CONTAINER_DIR_PATTERN NM_APP_DIR_PATTERN "/%s" -#define CONTAINER_SCRIPT "launch_container.sh" -#define NM_SYS_DIR_KEY "yarn.nodemanager.local-dirs" -#define NM_LOG_DIR_KEY "yarn.nodemanager.log-dirs" -#define CREDENTIALS_FILENAME "container_tokens" -#define MIN_USERID_KEY "min.user.id" -#define BANNED_USERS_KEY "banned.users" - -extern struct passwd *user_detail; - -// the log file for messages -extern FILE *LOGFILE; -// the log file for error messages -extern FILE *ERRORFILE; - - -// get the executable's filename -char* get_executable(); - -/** - * Check the permissions on the container-executor to make sure that security is - * permissible. For this, we need container-executor binary to - * * be user-owned by root - * * be group-owned by a configured special group. - * * others do not have any permissions - * * be setuid/setgid - * @param executable_file the file to check - * @return -1 on error 0 on success. - */ -int check_executor_permissions(char *executable_file); - -// initialize the application directory -int initialize_app(const char *user, const char *app_id, - const char *credentials, char* const* args); - -/* - * Function used to launch a container as the provided user. It does the following : - * 1) Creates container work dir and log dir to be accessible by the child - * 2) Copies the script file from the TT to the work directory - * 3) Sets up the environment - * 4) Does an execlp on the same in order to replace the current image with - * container image. - * @param user the user to become - * @param app_id the application id - * @param container_id the container id - * @param work_dir the working directory for the container. - * @param script_name the name of the script to be run to launch the container. - * @param cred_file the credentials file that needs to be compied to the - * working directory. - * @param pid_file file where pid of process should be written to - * @return -1 or errorcode enum value on error (should never return on success). - */ -int launch_container_as_user(const char * user, const char *app_id, - const char *container_id, const char *work_dir, - const char *script_name, const char *cred_file, - const char *pid_file); - -/** - * Function used to signal a container launched by the user. - * The function sends appropriate signal to the process group - * specified by the pid. - * @param user the user to send the signal as. - * @param pid the process id to send the signal to. - * @param sig the signal to send. - * @return an errorcode enum value on error, or 0 on success. - */ -int signal_container_as_user(const char *user, int pid, int sig); - -// delete a directory (or file) recursively as the user. The directory -// could optionally be relative to the baseDir set of directories (if the same -// directory appears on multiple disk volumes, the disk volumes should be passed -// as the baseDirs). If baseDirs is not specified, then dir_to_be_deleted is -// assumed as the absolute path -int delete_as_user(const char *user, - const char *dir_to_be_deleted, - char* const* baseDirs); - -// set the uid and gid of the node manager. This is used when doing some -// priviledged operations for setting the effective uid and gid. -void set_nm_uid(uid_t user, gid_t group); - -/** - * Is the user a real user account? - * Checks: - * 1. Not root - * 2. UID is above the minimum configured. - * 3. Not in banned user list - * Returns NULL on failure - */ -struct passwd* check_user(const char *user); - -// set the user -int set_user(const char *user); - -// methods to get the directories - -char *get_user_directory(const char *nm_root, const char *user); - -char *get_app_directory(const char * nm_root, const char *user, - const char *app_id); - -char *get_container_work_directory(const char *nm_root, const char *user, - const char *app_id, const char *container_id); - -char *get_container_launcher_file(const char* work_dir); - -char *get_container_credentials_file(const char* work_dir); - -/** - * Get the app log directory under log_root - */ -char* get_app_log_directory(const char* log_root, const char* appid); - -/** - * Ensure that the given path and all of the parent directories are created - * with the desired permissions. - */ -int mkdirs(const char* path, mode_t perm); - -/** - * Function to initialize the user directories of a user. - */ -int initialize_user(const char *user); - -/** - * Create a top level directory for the user. - * It assumes that the parent directory is *not* writable by the user. - * It creates directories with 02700 permissions owned by the user - * and with the group set to the node manager group. - * return non-0 on failure - */ -int create_directory_for_user(const char* path); - -int change_user(uid_t user, gid_t group); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/.autom4te.cfg b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/.autom4te.cfg similarity index 100% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/.autom4te.cfg rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/.autom4te.cfg diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/.deps/container-executor.Po b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/.deps/container-executor.Po similarity index 100% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/.deps/container-executor.Po rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/.deps/container-executor.Po diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/Makefile.am b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/Makefile.am similarity index 88% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/Makefile.am rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/Makefile.am index 4938bb2f53a..ec6052e99dc 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/Makefile.am +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/Makefile.am @@ -18,9 +18,9 @@ AM_CFLAGS=-I$(srcdir)/impl -Wall -g -Werror # Define the programs that need to be built bin_PROGRAMS = container-executor -check_PROGRAMS = test-container-executor +check_PROGRAMS = test-task-controller -TESTS = test-container-executor +TESTS = test-task-controller # Define the sources for the common files common_SOURCES = impl/configuration.c impl/container-executor.c @@ -29,4 +29,4 @@ common_SOURCES = impl/configuration.c impl/container-executor.c container_executor_SOURCES = $(common_SOURCES) impl/main.c # Define the sources for the test executable -test_container_executor_SOURCES = $(common_SOURCES) test/test-container-executor.c +test_task_controller_SOURCES = $(common_SOURCES) test/test-task-controller.c diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/configure.ac b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/configure.ac similarity index 100% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/configure.ac rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/configure.ac diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/configuration.c b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c similarity index 91% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/configuration.c rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c index d85715be7a0..f1f53bfe451 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/configuration.c +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c @@ -69,16 +69,16 @@ void free_configurations() { static int is_only_root_writable(const char *file) { struct stat file_stat; if (stat(file, &file_stat) != 0) { - fprintf(ERRORFILE, "Can't stat file %s - %s\n", file, strerror(errno)); + fprintf(LOGFILE, "Can't stat file %s - %s\n", file, strerror(errno)); return 0; } if (file_stat.st_uid != 0) { - fprintf(ERRORFILE, "File %s must be owned by root, but is owned by %d\n", + fprintf(LOGFILE, "File %s must be owned by root, but is owned by %d\n", file, file_stat.st_uid); return 0; } if ((file_stat.st_mode & (S_IWGRP | S_IWOTH)) != 0) { - fprintf(ERRORFILE, + fprintf(LOGFILE, "File %s must not be world or group writable, but is %03o\n", file, file_stat.st_mode & (~S_IFMT)); return 0; @@ -109,6 +109,7 @@ int check_configuration_permissions(const char* file_name) { //function used to load the configurations present in the secure config void read_config(const char* file_name) { + fprintf(LOGFILE, "Reading task controller config from %s\n" , file_name); FILE *conf_file; char *line; char *equaltok; @@ -117,7 +118,7 @@ void read_config(const char* file_name) { int size_read = 0; if (file_name == NULL) { - fprintf(ERRORFILE, "Null configuration filename passed in\n"); + fprintf(LOGFILE, "Null configuration filename passed in\n"); exit(INVALID_CONFIG_FILE); } @@ -131,33 +132,30 @@ void read_config(const char* file_name) { config.size = 0; conf_file = fopen(file_name, "r"); if (conf_file == NULL) { - fprintf(ERRORFILE, "Invalid conf file provided : %s \n", file_name); + fprintf(LOGFILE, "Invalid conf file provided : %s \n", file_name); exit(INVALID_CONFIG_FILE); } while(!feof(conf_file)) { line = (char *) malloc(linesize); if(line == NULL) { - fprintf(ERRORFILE, "malloc failed while reading configuration file.\n"); + fprintf(LOGFILE, "malloc failed while reading configuration file.\n"); exit(OUT_OF_MEMORY); } size_read = getline(&line,&linesize,conf_file); - //feof returns true only after we read past EOF. //so a file with no new line, at last can reach this place //if size_read returns negative check for eof condition if (size_read == -1) { - free(line); if(!feof(conf_file)){ + fprintf(LOGFILE, "getline returned error.\n"); exit(INVALID_CONFIG_FILE); - } else { + }else { + free(line); break; } } - int eol = strlen(line) - 1; - if(line[eol] == '\n') { - //trim the ending new line - line[eol] = '\0'; - } + //trim the ending new line + line[strlen(line)-1] = '\0'; //comment line if(line[0] == '#') { free(line); @@ -219,15 +217,14 @@ void read_config(const char* file_name) { config.size++; free(line); } - + //close the file fclose(conf_file); if (config.size == 0) { - fprintf(ERRORFILE, "Invalid configuration provided in %s\n", file_name); + fprintf(LOGFILE, "Invalid configuration provided in %s\n", file_name); exit(INVALID_CONFIG_FILE); } - //clean up allocated file name return; //free spaces alloced. diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/configuration.h b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h similarity index 100% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/configuration.h rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/container-executor.c b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c similarity index 71% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/container-executor.c rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 73d160ae66b..e4926734d75 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/container-executor.c +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -40,17 +40,13 @@ static const char* DEFAULT_BANNED_USERS[] = {"mapred", "hdfs", "bin", 0}; struct passwd *user_detail = NULL; FILE* LOGFILE = NULL; -FILE* ERRORFILE = NULL; -static uid_t nm_uid = -1; -static gid_t nm_gid = -1; +static uid_t tt_uid = -1; +static gid_t tt_gid = -1; -char *concatenate(char *concat_pattern, char *return_path_name, - int numArgs, ...); - -void set_nm_uid(uid_t user, gid_t group) { - nm_uid = user; - nm_gid = group; +void set_tasktracker_uid(uid_t user, gid_t group) { + tt_uid = user; + tt_gid = group; } /** @@ -62,11 +58,11 @@ char* get_executable() { char *filename = malloc(PATH_MAX); ssize_t len = readlink(buffer, filename, PATH_MAX); if (len == -1) { - fprintf(ERRORFILE, "Can't get executable name from %s - %s\n", buffer, + fprintf(stderr, "Can't get executable name from %s - %s\n", buffer, strerror(errno)); exit(-1); } else if (len >= PATH_MAX) { - fprintf(ERRORFILE, "Executable name %.*s is longer than %d characters.\n", + fprintf(LOGFILE, "Executable name %.*s is longer than %d characters.\n", PATH_MAX, filename, PATH_MAX); exit(-1); } @@ -74,12 +70,20 @@ char* get_executable() { return filename; } -int check_executor_permissions(char *executable_file) { +/** + * Check the permissions on taskcontroller to make sure that security is + * promisable. For this, we need container-executor binary to + * * be user-owned by root + * * be group-owned by a configured special group. + * * others do not have any permissions + * * be setuid/setgid + */ +int check_taskcontroller_permissions(char *executable_file) { errno = 0; char * resolved_path = realpath(executable_file, NULL); if (resolved_path == NULL) { - fprintf(ERRORFILE, + fprintf(LOGFILE, "Error resolving the canonical name for the executable : %s!", strerror(errno)); return -1; @@ -88,7 +92,7 @@ int check_executor_permissions(char *executable_file) { struct stat filestat; errno = 0; if (stat(resolved_path, &filestat) != 0) { - fprintf(ERRORFILE, + fprintf(LOGFILE, "Could not stat the executable : %s!.\n", strerror(errno)); return -1; } @@ -104,7 +108,7 @@ int check_executor_permissions(char *executable_file) { } if (binary_gid != getgid()) { - fprintf(LOGFILE, "The configured nodemanager group %d is different from" + fprintf(LOGFILE, "The configured tasktracker group %d is different from" " the group of the executable %d\n", getgid(), binary_gid); return -1; } @@ -150,60 +154,6 @@ static int change_effective_user(uid_t user, gid_t group) { return 0; } -/** - * Write the pid of the current process into the pid file. - * pid_file: Path to pid file where pid needs to be written to - */ -static int write_pid_to_file_as_nm(const char* pid_file, pid_t pid) { - uid_t user = geteuid(); - gid_t group = getegid(); - if (change_effective_user(nm_uid, nm_gid) != 0) { - return -1; - } - - char *temp_pid_file = concatenate("%s.tmp", "pid_file_path", 1, pid_file); - - // create with 700 - int pid_fd = open(temp_pid_file, O_WRONLY|O_CREAT|O_EXCL, S_IRWXU); - if (pid_fd == -1) { - fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", temp_pid_file, - strerror(errno)); - free(temp_pid_file); - return -1; - } - - // write pid to temp file - char pid_buf[21]; - snprintf(pid_buf, 21, "%d", pid); - ssize_t written = write(pid_fd, pid_buf, strlen(pid_buf)); - close(pid_fd); - if (written == -1) { - fprintf(LOGFILE, "Failed to write pid to file %s as node manager - %s\n", - temp_pid_file, strerror(errno)); - free(temp_pid_file); - return -1; - } - - // rename temp file to actual pid file - // use rename as atomic - if (rename(temp_pid_file, pid_file)) { - fprintf(LOGFILE, "Can't move pid file from %s to %s as node manager - %s\n", - temp_pid_file, pid_file, strerror(errno)); - unlink(temp_pid_file); - free(temp_pid_file); - return -1; - } - - // Revert back to the calling user. - if (change_effective_user(user, group)) { - free(temp_pid_file); - return -1; - } - - free(temp_pid_file); - return 0; -} - /** * Change the real and effective user and group to abandon the super user * priviledges. @@ -273,45 +223,62 @@ char *concatenate(char *concat_pattern, char *return_path_name, } /** - * Get the app-directory path from nm_root, user name and app-id + * Get the job-directory path from tt_root, user name and job-id */ -char *get_app_directory(const char * nm_root, const char *user, - const char *app_id) { - return concatenate(NM_APP_DIR_PATTERN, "app_dir_path", 3, nm_root, user, - app_id); +char *get_job_directory(const char * tt_root, const char *user, + const char *jobid) { + return concatenate(TT_JOB_DIR_PATTERN, "job_dir_path", 3, tt_root, user, + jobid); } /** * Get the user directory of a particular user */ -char *get_user_directory(const char *nm_root, const char *user) { - return concatenate(USER_DIR_PATTERN, "user_dir_path", 2, nm_root, user); +char *get_user_directory(const char *tt_root, const char *user) { + return concatenate(USER_DIR_PATTERN, "user_dir_path", 2, tt_root, user); +} + +char *get_job_work_directory(const char *job_dir) { + return concatenate("%s/work", "job work", 1, job_dir); } /** - * Get the container directory for the given container_id + * Get the attempt directory for the given attempt_id */ -char *get_container_work_directory(const char *nm_root, const char *user, - const char *app_id, const char *container_id) { - return concatenate(CONTAINER_DIR_PATTERN, "container_dir_path", 4, - nm_root, user, app_id, container_id); +char *get_attempt_work_directory(const char *tt_root, const char *user, + const char *job_id, const char *attempt_id) { + return concatenate(ATTEMPT_DIR_PATTERN, "attempt_dir_path", 4, + tt_root, user, job_id, attempt_id); } -char *get_container_launcher_file(const char* work_dir) { - return concatenate("%s/%s", "container launcher", 2, work_dir, CONTAINER_SCRIPT); +char *get_task_launcher_file(const char* work_dir) { + return concatenate("%s/%s", "task launcher", 2, work_dir, TASK_SCRIPT); } -char *get_container_credentials_file(const char* work_dir) { - return concatenate("%s/%s", "container credentials", 2, work_dir, +char *get_task_credentials_file(const char* work_dir) { + return concatenate("%s/%s", "task crednetials", 2, work_dir, CREDENTIALS_FILENAME); } /** - * Get the app log directory under the given log_root + * Get the job log directory under the given log_root */ -char* get_app_log_directory(const char *log_root, const char* app_id) { - return concatenate("%s/%s", "app log dir", 2, log_root, - app_id); +char* get_job_log_directory(const char *log_root, const char* jobid) { + return concatenate("%s/%s", "job log dir", 2, log_root, + jobid); +} + +/* + * Get a user subdirectory. + */ +char *get_user_subdirectory(const char *tt_root, + const char *user, + const char *subdir) { + char * user_dir = get_user_directory(tt_root, user); + char * result = concatenate("%s/%s", "user subdir", 2, + user_dir, subdir); + free(user_dir); + return result; } /** @@ -353,42 +320,43 @@ int mkdirs(const char* path, mode_t perm) { } /** - * Function to prepare the container directories. - * It creates the container work and log directories. + * Function to prepare the attempt directories for the task JVM. + * It creates the task work and log directories. */ -static int create_container_directories(const char* user, const char *app_id, - const char *container_id) { +static int create_attempt_directories(const char* user, const char *job_id, + const char *task_id) { // create dirs as 0750 const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP; - if (app_id == NULL || container_id == NULL || user == NULL) { + if (job_id == NULL || task_id == NULL || user == NULL) { fprintf(LOGFILE, - "Either app_id, container_id or the user passed is null.\n"); + "Either task_id is null or the user passed is null.\n"); return -1; } int result = -1; - char **local_dir = get_values(NM_SYS_DIR_KEY); + char **local_dir = get_values(TT_SYS_DIR_KEY); if (local_dir == NULL) { - fprintf(LOGFILE, "%s is not configured.\n", NM_SYS_DIR_KEY); + fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY); return -1; } char **local_dir_ptr; for(local_dir_ptr = local_dir; *local_dir_ptr != NULL; ++local_dir_ptr) { - char *container_dir = get_container_work_directory(*local_dir_ptr, user, app_id, - container_id); - if (container_dir == NULL) { + char *task_dir = get_attempt_work_directory(*local_dir_ptr, user, job_id, + task_id); + if (task_dir == NULL) { free_values(local_dir); return -1; } - if (mkdirs(container_dir, perms) == 0) { + if (mkdirs(task_dir, perms) != 0) { + // continue on to create other task directories + free(task_dir); + } else { result = 0; + free(task_dir); } - // continue on to create other work directories - free(container_dir); - } free_values(local_dir); if (result != 0) { @@ -396,36 +364,34 @@ static int create_container_directories(const char* user, const char *app_id, } result = -1; - // also make the directory for the container logs - char *combined_name = malloc(strlen(app_id) + strlen(container_id) + 2); - if (combined_name == NULL) { - fprintf(LOGFILE, "Malloc of combined name failed\n"); + // also make the directory for the task logs + char *job_task_name = malloc(strlen(job_id) + strlen(task_id) + 2); + if (job_task_name == NULL) { + fprintf(LOGFILE, "Malloc of job task name failed\n"); result = -1; } else { - sprintf(combined_name, "%s/%s", app_id, container_id); + sprintf(job_task_name, "%s/%s", job_id, task_id); - char **log_dir = get_values(NM_LOG_DIR_KEY); + char **log_dir = get_values(TT_LOG_DIR_KEY); if (log_dir == NULL) { - free(combined_name); - fprintf(LOGFILE, "%s is not configured.\n", NM_LOG_DIR_KEY); + fprintf(LOGFILE, "%s is not configured.\n", TT_LOG_DIR_KEY); return -1; } char **log_dir_ptr; for(log_dir_ptr = log_dir; *log_dir_ptr != NULL; ++log_dir_ptr) { - char *container_log_dir = get_app_log_directory(*log_dir_ptr, combined_name); - if (container_log_dir == NULL) { - free(combined_name); + char *job_log_dir = get_job_log_directory(*log_dir_ptr, job_task_name); + if (job_log_dir == NULL) { free_values(log_dir); return -1; - } else if (mkdirs(container_log_dir, perms) != 0) { - free(container_log_dir); + } else if (mkdirs(job_log_dir, perms) != 0) { + free(job_log_dir); } else { result = 0; - free(container_log_dir); + free(job_log_dir); } } - free(combined_name); + free(job_task_name); free_values(log_dir); } return result; @@ -495,14 +461,11 @@ struct passwd* check_user(const char *user) { for(; *banned_user; ++banned_user) { if (strcmp(*banned_user, user) == 0) { free(user_info); - if (banned_users != (char**)DEFAULT_BANNED_USERS) { - free_values(banned_users); - } fprintf(LOGFILE, "Requested user %s is banned\n", user); return NULL; } } - if (banned_users != NULL && banned_users != (char**)DEFAULT_BANNED_USERS) { + if (banned_users != NULL) { free_values(banned_users); } return user_info; @@ -549,7 +512,7 @@ static int change_owner(const char* path, uid_t user, gid_t group) { * Create a top level directory for the user. * It assumes that the parent directory is *not* writable by the user. * It creates directories with 02750 permissions owned by the user - * and with the group set to the node manager group. + * and with the group set to the task tracker group. * return non-0 on failure */ int create_directory_for_user(const char* path) { @@ -561,7 +524,7 @@ int create_directory_for_user(const char* path) { int ret = 0; if(getuid() == root) { - ret = change_effective_user(root, nm_gid); + ret = change_effective_user(root, tt_gid); } if (ret == 0) { @@ -571,8 +534,8 @@ int create_directory_for_user(const char* path) { fprintf(LOGFILE, "Can't chmod %s to add the sticky bit - %s\n", path, strerror(errno)); ret = -1; - } else if (change_owner(path, user, nm_gid) != 0) { - fprintf(LOGFILE, "Failed to chown %s to %d:%d: %s\n", path, user, nm_gid, + } else if (change_owner(path, user, tt_gid) != 0) { + fprintf(LOGFILE, "Failed to chown %s to %d:%d: %s\n", path, user, tt_gid, strerror(errno)); ret = -1; } @@ -591,18 +554,18 @@ int create_directory_for_user(const char* path) { } /** - * Open a file as the node manager and return a file descriptor for it. + * Open a file as the tasktracker and return a file descriptor for it. * Returns -1 on error */ -static int open_file_as_nm(const char* filename) { +static int open_file_as_task_tracker(const char* filename) { uid_t user = geteuid(); gid_t group = getegid(); - if (change_effective_user(nm_uid, nm_gid) != 0) { + if (change_effective_user(tt_uid, tt_gid) != 0) { return -1; } int result = open(filename, O_RDONLY); if (result == -1) { - fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", filename, + fprintf(LOGFILE, "Can't open file %s as task tracker - %s\n", filename, strerror(errno)); } if (change_effective_user(user, group)) { @@ -661,10 +624,10 @@ static int copy_file(int input, const char* in_filename, * Function to initialize the user directories of a user. */ int initialize_user(const char *user) { - char **local_dir = get_values(NM_SYS_DIR_KEY); + char **local_dir = get_values(TT_SYS_DIR_KEY); if (local_dir == NULL) { - fprintf(LOGFILE, "%s is not configured.\n", NM_SYS_DIR_KEY); - return INVALID_NM_ROOT_DIRS; + fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY); + return INVALID_TT_ROOT; } char *user_dir; @@ -687,12 +650,12 @@ int initialize_user(const char *user) { } /** - * Function to prepare the application directories for the container. + * Function to prepare the job directories for the task JVM. */ -int initialize_app(const char *user, const char *app_id, +int initialize_job(const char *user, const char *jobid, const char* nmPrivate_credentials_file, char* const* args) { - if (app_id == NULL || user == NULL) { - fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n"); + if (jobid == NULL || user == NULL) { + fprintf(LOGFILE, "Either jobid is null or the user passed is null.\n"); return INVALID_ARGUMENT_NUMBER; } @@ -703,35 +666,35 @@ int initialize_app(const char *user, const char *app_id, } ////////////// create the log directories for the app on all disks - char **log_roots = get_values(NM_LOG_DIR_KEY); + char **log_roots = get_values(TT_LOG_DIR_KEY); if (log_roots == NULL) { return INVALID_CONFIG_FILE; } char **log_root; - char *any_one_app_log_dir = NULL; + char *any_one_job_log_dir = NULL; for(log_root=log_roots; *log_root != NULL; ++log_root) { - char *app_log_dir = get_app_log_directory(*log_root, app_id); - if (app_log_dir == NULL) { + char *job_log_dir = get_job_log_directory(*log_root, jobid); + if (job_log_dir == NULL) { // try the next one - } else if (create_directory_for_user(app_log_dir) != 0) { - free(app_log_dir); + } else if (create_directory_for_user(job_log_dir) != 0) { + free(job_log_dir); return -1; - } else if (any_one_app_log_dir == NULL) { - any_one_app_log_dir = app_log_dir; + } else if (any_one_job_log_dir == NULL) { + any_one_job_log_dir = job_log_dir; } else { - free(app_log_dir); + free(job_log_dir); } } free_values(log_roots); - if (any_one_app_log_dir == NULL) { - fprintf(LOGFILE, "Did not create any app-log directories\n"); + if (any_one_job_log_dir == NULL) { + fprintf(LOGFILE, "Did not create any job-log directories\n"); return -1; } - free(any_one_app_log_dir); + free(any_one_job_log_dir); ////////////// End of creating the log directories for the app on all disks // open up the credentials file - int cred_file = open_file_as_nm(nmPrivate_credentials_file); + int cred_file = open_file_as_task_tracker(nmPrivate_credentials_file); if (cred_file == -1) { return -1; } @@ -743,29 +706,29 @@ int initialize_app(const char *user, const char *app_id, // 750 mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP; - char **nm_roots = get_values(NM_SYS_DIR_KEY); + char **tt_roots = get_values(TT_SYS_DIR_KEY); - if (nm_roots == NULL) { + if (tt_roots == NULL) { return INVALID_CONFIG_FILE; } - char **nm_root; - char *primary_app_dir = NULL; - for(nm_root=nm_roots; *nm_root != NULL; ++nm_root) { - char *app_dir = get_app_directory(*nm_root, user, app_id); - if (app_dir == NULL) { + char **tt_root; + char *primary_job_dir = NULL; + for(tt_root=tt_roots; *tt_root != NULL; ++tt_root) { + char *job_dir = get_job_directory(*tt_root, user, jobid); + if (job_dir == NULL) { // try the next one - } else if (mkdirs(app_dir, permissions) != 0) { - free(app_dir); - } else if (primary_app_dir == NULL) { - primary_app_dir = app_dir; + } else if (mkdirs(job_dir, permissions) != 0) { + free(job_dir); + } else if (primary_job_dir == NULL) { + primary_job_dir = job_dir; } else { - free(app_dir); + free(job_dir); } } - free_values(nm_roots); - if (primary_app_dir == NULL) { - fprintf(LOGFILE, "Did not create any app directories\n"); + free_values(tt_roots); + if (primary_job_dir == NULL) { + fprintf(LOGFILE, "Did not create any job directories\n"); return -1; } @@ -773,7 +736,7 @@ int initialize_app(const char *user, const char *app_id, // TODO: FIXME. The user's copy of creds should go to a path selected by // localDirAllocatoir char *cred_file_name = concatenate("%s/%s", "cred file", 2, - primary_app_dir, basename(nmPrivate_credentials_file_copy)); + primary_job_dir, basename(nmPrivate_credentials_file_copy)); if (cred_file_name == NULL) { free(nmPrivate_credentials_file_copy); return -1; @@ -791,76 +754,67 @@ int initialize_app(const char *user, const char *app_id, if (LOGFILE != stdout) { fclose(stdout); } - if (ERRORFILE != stderr) { - fclose(stderr); - } - if (chdir(primary_app_dir) != 0) { - fprintf(LOGFILE, "Failed to chdir to app dir - %s\n", strerror(errno)); + fclose(stderr); + if (chdir(primary_job_dir) != 0) { + fprintf(LOGFILE, "Failed to chdir to job dir - %s\n", strerror(errno)); return -1; } execvp(args[0], args); - fprintf(ERRORFILE, "Failure to exec app initialization process - %s\n", + fprintf(LOGFILE, "Failure to exec job initialization process - %s\n", strerror(errno)); return -1; } -int launch_container_as_user(const char *user, const char *app_id, - const char *container_id, const char *work_dir, - const char *script_name, const char *cred_file, - const char* pid_file) { +/* + * Function used to launch a task as the provided user. It does the following : + * 1) Creates attempt work dir and log dir to be accessible by the child + * 2) Copies the script file from the TT to the work directory + * 3) Sets up the environment + * 4) Does an execlp on the same in order to replace the current image with + * task image. + */ +int run_task_as_user(const char *user, const char *job_id, + const char *task_id, const char *work_dir, + const char *script_name, const char *cred_file) { int exit_code = -1; char *script_file_dest = NULL; char *cred_file_dest = NULL; - script_file_dest = get_container_launcher_file(work_dir); + script_file_dest = get_task_launcher_file(work_dir); if (script_file_dest == NULL) { exit_code = OUT_OF_MEMORY; goto cleanup; } - cred_file_dest = get_container_credentials_file(work_dir); + cred_file_dest = get_task_credentials_file(work_dir); if (NULL == cred_file_dest) { exit_code = OUT_OF_MEMORY; goto cleanup; } // open launch script - int container_file_source = open_file_as_nm(script_name); - if (container_file_source == -1) { + int task_file_source = open_file_as_task_tracker(script_name); + if (task_file_source == -1) { goto cleanup; } // open credentials - int cred_file_source = open_file_as_nm(cred_file); + int cred_file_source = open_file_as_task_tracker(cred_file); if (cred_file_source == -1) { goto cleanup; } - // setsid - pid_t pid = setsid(); - if (pid == -1) { - exit_code = SETSID_OPER_FAILED; - goto cleanup; - } - - // write pid to pidfile - if (pid_file == NULL - || write_pid_to_file_as_nm(pid_file, pid) != 0) { - exit_code = WRITE_PIDFILE_FAILED; - goto cleanup; - } - // give up root privs if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) { exit_code = SETUID_OPER_FAILED; goto cleanup; } - if (create_container_directories(user, app_id, container_id) != 0) { - fprintf(LOGFILE, "Could not create container dirs"); + if (create_attempt_directories(user, job_id, task_id) != 0) { + fprintf(LOGFILE, "Could not create attempt dirs"); goto cleanup; } // 700 - if (copy_file(container_file_source, script_name, script_file_dest,S_IRWXU) != 0) { + if (copy_file(task_file_source, script_name, script_file_dest,S_IRWXU) != 0) { goto cleanup; } @@ -878,9 +832,9 @@ int launch_container_as_user(const char *user, const char *app_id, goto cleanup; } if (execlp(script_file_dest, script_file_dest, NULL) != 0) { - fprintf(LOGFILE, "Couldn't execute the container launch file %s - %s", + fprintf(LOGFILE, "Couldn't execute the task jvm file %s - %s", script_file_dest, strerror(errno)); - exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT; + exit_code = UNABLE_TO_EXECUTE_TASK_SCRIPT; goto cleanup; } exit_code = 0; @@ -891,9 +845,14 @@ int launch_container_as_user(const char *user, const char *app_id, return exit_code; } -int signal_container_as_user(const char *user, int pid, int sig) { +/** + * Function used to signal a task launched by the user. + * The function sends appropriate signal to the process group + * specified by the task_pid. + */ +int signal_user_task(const char *user, int pid, int sig) { if(pid <= 0) { - return INVALID_CONTAINER_PID; + return INVALID_TASK_PID; } if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) { @@ -905,9 +864,9 @@ int signal_container_as_user(const char *user, int pid, int sig) { if (kill(-pid,0) < 0) { if (kill(pid, 0) < 0) { if (errno == ESRCH) { - return INVALID_CONTAINER_PID; + return INVALID_TASK_PID; } - fprintf(LOGFILE, "Error signalling container %d with %d - %s\n", + fprintf(LOGFILE, "Error signalling task %d with %d - %s\n", pid, sig, strerror(errno)); return -1; } else { @@ -920,13 +879,9 @@ int signal_container_as_user(const char *user, int pid, int sig) { fprintf(LOGFILE, "Error signalling process group %d with signal %d - %s\n", -pid, sig, strerror(errno)); - fprintf(stderr, - "Error signalling process group %d with signal %d - %s\n", - -pid, sig, strerror(errno)); - fflush(LOGFILE); - return UNABLE_TO_SIGNAL_CONTAINER; + return UNABLE_TO_KILL_TASK; } else { - return INVALID_CONTAINER_PID; + return INVALID_TASK_PID; } } fprintf(LOGFILE, "Killing process %s%d with %d\n", @@ -935,12 +890,12 @@ int signal_container_as_user(const char *user, int pid, int sig) { } /** - * Delete a final directory as the node manager user. + * Delete a final directory as the task tracker user. */ -static int rmdir_as_nm(const char* path) { +static int rmdir_as_tasktracker(const char* path) { int user_uid = geteuid(); int user_gid = getegid(); - int ret = change_effective_user(nm_uid, nm_gid); + int ret = change_effective_user(tt_uid, tt_gid); if (ret == 0) { if (rmdir(path) != 0) { fprintf(LOGFILE, "rmdir of %s failed - %s\n", path, strerror(errno)); @@ -1061,8 +1016,8 @@ static int delete_path(const char *full_path, if (needs_tt_user) { // If the delete failed, try a final rmdir as root on the top level. // That handles the case where the top level directory is in a directory - // that is owned by the node manager. - exit_code = rmdir_as_nm(full_path); + // that is owned by the task tracker. + exit_code = rmdir_as_tasktracker(full_path); } free(paths[0]); } @@ -1070,7 +1025,7 @@ static int delete_path(const char *full_path, } /** - * Delete the given directory as the user from each of the directories + * Delete the given directory as the user from each of the tt_root directories * user: the user doing the delete * subdir: the subdir to delete (if baseDirs is empty, this is treated as an absolute path) @@ -1103,5 +1058,3 @@ int delete_as_user(const char *user, } return ret; } - - diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h new file mode 100644 index 00000000000..4c54324848e --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +//command definitions +enum command { + INITIALIZE_JOB = 0, + LAUNCH_TASK_JVM = 1, + SIGNAL_TASK = 2, + DELETE_AS_USER = 3, +}; + +enum errorcodes { + INVALID_ARGUMENT_NUMBER = 1, + INVALID_USER_NAME, //2 + INVALID_COMMAND_PROVIDED, //3 + SUPER_USER_NOT_ALLOWED_TO_RUN_TASKS, //4 + INVALID_TT_ROOT, //5 + SETUID_OPER_FAILED, //6 + UNABLE_TO_EXECUTE_TASK_SCRIPT, //7 + UNABLE_TO_KILL_TASK, //8 + INVALID_TASK_PID, //9 + ERROR_RESOLVING_FILE_PATH, //10 + RELATIVE_PATH_COMPONENTS_IN_FILE_PATH, //11 + UNABLE_TO_STAT_FILE, //12 + FILE_NOT_OWNED_BY_TASKTRACKER, //13 + PREPARE_ATTEMPT_DIRECTORIES_FAILED, //14 + INITIALIZE_JOB_FAILED, //15 + PREPARE_TASK_LOGS_FAILED, //16 + INVALID_TT_LOG_DIR, //17 + OUT_OF_MEMORY, //18 + INITIALIZE_DISTCACHEFILE_FAILED, //19 + INITIALIZE_USER_FAILED, //20 + UNABLE_TO_BUILD_PATH, //21 + INVALID_TASKCONTROLLER_PERMISSIONS, //22 + PREPARE_JOB_LOGS_FAILED, //23 + INVALID_CONFIG_FILE, // 24 +}; + +#define TT_GROUP_KEY "mapreduce.tasktracker.group" +#define USER_DIR_PATTERN "%s/usercache/%s" +#define TT_JOB_DIR_PATTERN USER_DIR_PATTERN "/appcache/%s" +#define ATTEMPT_DIR_PATTERN TT_JOB_DIR_PATTERN "/%s" +#define TASK_SCRIPT "task.sh" +#define TT_LOCAL_TASK_DIR_PATTERN TT_JOB_DIR_PATTERN "/%s" +#define TT_SYS_DIR_KEY "mapreduce.cluster.local.dir" +#define TT_LOG_DIR_KEY "hadoop.log.dir" +#define CREDENTIALS_FILENAME "container_tokens" +#define MIN_USERID_KEY "min.user.id" +#define BANNED_USERS_KEY "banned.users" + +extern struct passwd *user_detail; + +// the log file for messages +extern FILE *LOGFILE; + +// get the executable's filename +char* get_executable(); + +int check_taskcontroller_permissions(char *executable_file); + +// initialize the job directory +int initialize_job(const char *user, const char *jobid, + const char *credentials, char* const* args); + +// run the task as the user +int run_task_as_user(const char * user, const char *jobid, const char *taskid, + const char *work_dir, const char *script_name, + const char *cred_file); + +// send a signal as the user +int signal_user_task(const char *user, int pid, int sig); + +// delete a directory (or file) recursively as the user. The directory +// could optionally be relative to the baseDir set of directories (if the same +// directory appears on multiple disk volumes, the disk volumes should be passed +// as the baseDirs). If baseDirs is not specified, then dir_to_be_deleted is +// assumed as the absolute path +int delete_as_user(const char *user, + const char *dir_to_be_deleted, + char* const* baseDirs); + +// set the task tracker's uid and gid +void set_tasktracker_uid(uid_t user, gid_t group); + +/** + * Is the user a real user account? + * Checks: + * 1. Not root + * 2. UID is above the minimum configured. + * 3. Not in banned user list + * Returns NULL on failure + */ +struct passwd* check_user(const char *user); + +// set the user +int set_user(const char *user); + +// methods to get the directories + +char *get_user_directory(const char *tt_root, const char *user); + +char *get_job_directory(const char * tt_root, const char *user, + const char *jobid); + +char *get_attempt_work_directory(const char *tt_root, const char *user, + const char *job_dir, const char *attempt_id); + +char *get_task_launcher_file(const char* work_dir); + +char *get_task_credentials_file(const char* work_dir); + +/** + * Get the job log directory under log_root + */ +char* get_job_log_directory(const char* log_root, const char* jobid); + +/** + * Ensure that the given path and all of the parent directories are created + * with the desired permissions. + */ +int mkdirs(const char* path, mode_t perm); + +/** + * Function to initialize the user directories of a user. + */ +int initialize_user(const char *user); + +/** + * Create a top level directory for the user. + * It assumes that the parent directory is *not* writable by the user. + * It creates directories with 02700 permissions owned by the user + * and with the group set to the task tracker group. + * return non-0 on failure + */ +int create_directory_for_user(const char* path); + +int change_user(uid_t user, gid_t group); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/main.c b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c similarity index 61% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/main.c rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c index 6e62ef9100f..d5cc531001a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/impl/main.c +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c @@ -31,22 +31,18 @@ #define _STRINGIFY(X) #X #define STRINGIFY(X) _STRINGIFY(X) -#define CONF_FILENAME "container-executor.cfg" - -#ifndef HADOOP_CONF_DIR - #error HADOOP_CONF_DIR must be defined -#endif +#define CONF_FILENAME "taskcontroller.cfg" void display_usage(FILE *stream) { fprintf(stream, "Usage: container-executor user command command-args\n"); fprintf(stream, "Commands:\n"); - fprintf(stream, " initialize container: %2d appid tokens cmd app...\n", - INITIALIZE_CONTAINER); - fprintf(stream, " launch container: %2d appid containerid workdir container-script tokens\n", - LAUNCH_CONTAINER); - fprintf(stream, " signal container: %2d container-pid signal\n", - SIGNAL_CONTAINER); + fprintf(stream, " initialize job: %2d jobid tokens cmd args\n", + INITIALIZE_JOB); + fprintf(stream, " launch task: %2d jobid taskid workdir task-script jobTokens\n", + LAUNCH_TASK_JVM); + fprintf(stream, " signal task: %2d task-pid signal\n", + SIGNAL_TASK); fprintf(stream, " delete as user: %2d relative-path\n", DELETE_AS_USER); } @@ -59,14 +55,12 @@ int main(int argc, char **argv) { } LOGFILE = stdout; - ERRORFILE = stderr; int command; - const char * app_id = NULL; - const char * container_id = NULL; + const char * job_id = NULL; + const char * task_id = NULL; const char * cred_file = NULL; const char * script_file = NULL; const char * current_dir = NULL; - const char * pid_file = NULL; int exit_code = 0; @@ -74,46 +68,54 @@ int main(int argc, char **argv) { char *executable_file = get_executable(); +#ifndef HADOOP_CONF_DIR + #error HADOOP_CONF_DIR must be defined +#endif + char *orig_conf_file = STRINGIFY(HADOOP_CONF_DIR) "/" CONF_FILENAME; char *conf_file = realpath(orig_conf_file, NULL); if (conf_file == NULL) { - fprintf(ERRORFILE, "Configuration file %s not found.\n", orig_conf_file); - exit(INVALID_CONFIG_FILE); + fprintf(LOGFILE, "Configuration file %s not found.\n", orig_conf_file); + fflush(LOGFILE); + return INVALID_CONFIG_FILE; } if (check_configuration_permissions(conf_file) != 0) { - exit(INVALID_CONFIG_FILE); + return INVALID_CONFIG_FILE; } read_config(conf_file); free(conf_file); - // look up the node manager group in the config file - char *nm_group = get_value(NM_GROUP_KEY); - if (nm_group == NULL) { - fprintf(ERRORFILE, "Can't get configured value for %s.\n", NM_GROUP_KEY); + // look up the task tracker group in the config file + char *tt_group = get_value(TT_GROUP_KEY); + if (tt_group == NULL) { + fprintf(LOGFILE, "Can't get configured value for %s.\n", TT_GROUP_KEY); + fflush(LOGFILE); exit(INVALID_CONFIG_FILE); } - struct group *group_info = getgrnam(nm_group); + struct group *group_info = getgrnam(tt_group); if (group_info == NULL) { - fprintf(ERRORFILE, "Can't get group information for %s - %s.\n", nm_group, + fprintf(LOGFILE, "Can't get group information for %s - %s.\n", tt_group, strerror(errno)); fflush(LOGFILE); exit(INVALID_CONFIG_FILE); } - set_nm_uid(getuid(), group_info->gr_gid); + set_tasktracker_uid(getuid(), group_info->gr_gid); // if we are running from a setuid executable, make the real uid root setuid(0); - // set the real and effective group id to the node manager group + // set the real and effective group id to the task tracker group setgid(group_info->gr_gid); - if (check_executor_permissions(executable_file) != 0) { - fprintf(ERRORFILE, "Invalid permissions on container-executor binary.\n"); - return INVALID_CONTAINER_EXEC_PERMISSIONS; + if (check_taskcontroller_permissions(executable_file) != 0) { + fprintf(LOGFILE, "Invalid permissions on container-executor binary.\n"); + fflush(LOGFILE); + return INVALID_TASKCONTROLLER_PERMISSIONS; } //checks done for user name if (argv[optind] == NULL) { - fprintf(ERRORFILE, "Invalid user name.\n"); + fprintf(LOGFILE, "Invalid user name \n"); + fflush(LOGFILE); return INVALID_USER_NAME; } int ret = set_user(argv[optind]); @@ -126,60 +128,53 @@ int main(int argc, char **argv) { fprintf(LOGFILE, "main : command provided %d\n",command); fprintf(LOGFILE, "main : user is %s\n", user_detail->pw_name); - fflush(LOGFILE); switch (command) { - case INITIALIZE_CONTAINER: + case INITIALIZE_JOB: if (argc < 6) { - fprintf(ERRORFILE, "Too few arguments (%d vs 6) for initialize container\n", + fprintf(LOGFILE, "Too few arguments (%d vs 6) for initialize job\n", argc); - fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; } - app_id = argv[optind++]; + job_id = argv[optind++]; cred_file = argv[optind++]; - exit_code = initialize_app(user_detail->pw_name, app_id, cred_file, + exit_code = initialize_job(user_detail->pw_name, job_id, cred_file, argv + optind); break; - case LAUNCH_CONTAINER: - if (argc < 9) { - fprintf(ERRORFILE, "Too few arguments (%d vs 8) for launch container\n", + case LAUNCH_TASK_JVM: + if (argc < 8) { + fprintf(LOGFILE, "Too few arguments (%d vs 8) for launch task\n", argc); - fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; } - app_id = argv[optind++]; - container_id = argv[optind++]; + job_id = argv[optind++]; + task_id = argv[optind++]; current_dir = argv[optind++]; script_file = argv[optind++]; cred_file = argv[optind++]; - pid_file = argv[optind++]; - exit_code = launch_container_as_user(user_detail->pw_name, app_id, container_id, - current_dir, script_file, cred_file, pid_file); + exit_code = run_task_as_user(user_detail->pw_name, job_id, task_id, + current_dir, script_file, cred_file); break; - case SIGNAL_CONTAINER: + case SIGNAL_TASK: if (argc < 5) { - fprintf(ERRORFILE, "Too few arguments (%d vs 5) for signal container\n", + fprintf(LOGFILE, "Too few arguments (%d vs 5) for signal task\n", argc); - fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; } else { char* end_ptr = NULL; char* option = argv[optind++]; - int container_pid = strtol(option, &end_ptr, 10); + int task_pid = strtol(option, &end_ptr, 10); if (option == end_ptr || *end_ptr != '\0') { - fprintf(ERRORFILE, "Illegal argument for container pid %s\n", option); - fflush(ERRORFILE); + fprintf(LOGFILE, "Illegal argument for task pid %s\n", option); return INVALID_ARGUMENT_NUMBER; } option = argv[optind++]; int signal = strtol(option, &end_ptr, 10); if (option == end_ptr || *end_ptr != '\0') { - fprintf(ERRORFILE, "Illegal argument for signal %s\n", option); - fflush(ERRORFILE); + fprintf(LOGFILE, "Illegal argument for signal %s\n", option); return INVALID_ARGUMENT_NUMBER; } - exit_code = signal_container_as_user(user_detail->pw_name, container_pid, signal); + exit_code = signal_user_task(user_detail->pw_name, task_pid, signal); } break; case DELETE_AS_USER: @@ -188,11 +183,8 @@ int main(int argc, char **argv) { argv + optind); break; default: - fprintf(ERRORFILE, "Invalid command %d not supported.",command); - fflush(ERRORFILE); exit_code = INVALID_COMMAND_PROVIDED; } fclose(LOGFILE); - fclose(ERRORFILE); return exit_code; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/test/test-container-executor.c b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-task-controller.c similarity index 60% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/test/test-container-executor.c rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-task-controller.c index 7c62f1ba183..fcacfff6e9a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor/test/test-container-executor.c +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-task-controller.c @@ -28,7 +28,7 @@ #include #include -#define TEST_ROOT "/tmp/test-container-controller" +#define TEST_ROOT "/tmp/test-task-controller" #define DONT_TOUCH_FILE "dont-touch-me" static char* username = NULL; @@ -84,40 +84,46 @@ void run(const char *cmd) { int write_config_file(char *file_name) { FILE *file; - int i = 0; file = fopen(file_name, "w"); if (file == NULL) { printf("Failed to open %s.\n", file_name); return EXIT_FAILURE; } - fprintf(file, "yarn.nodemanager.local-dirs=" TEST_ROOT "/local-1"); + fprintf(file, "mapred.local.dir=" TEST_ROOT "/local-1"); + int i; for(i=2; i < 5; ++i) { fprintf(file, "," TEST_ROOT "/local-%d", i); } fprintf(file, "\n"); - fprintf(file, "yarn.nodemanager.log-dirs=" TEST_ROOT "/logs\n"); + fprintf(file, "mapreduce.cluster.local.dir=" TEST_ROOT "/local-1"); + for(i=2; i < 5; ++i) { + fprintf(file, "," TEST_ROOT "/local-%d", i); + } + fprintf(file, "\n"); + + fprintf(file, "hadoop.log.dir=" TEST_ROOT "/logs\n"); fclose(file); return 0; } -void create_nm_roots() { - char** nm_roots = get_values(NM_SYS_DIR_KEY); - char** nm_root; - for(nm_root=nm_roots; *nm_root != NULL; ++nm_root) { - if (mkdir(*nm_root, 0755) != 0) { - printf("FAIL: Can't create directory %s - %s\n", *nm_root, +void create_tt_roots() { + char** tt_roots = get_values("mapred.local.dir"); + char** tt_root; + for(tt_root=tt_roots; *tt_root != NULL; ++tt_root) { + if (mkdir(*tt_root, 0755) != 0) { + printf("FAIL: Can't create directory %s - %s\n", *tt_root, strerror(errno)); exit(1); } char buffer[100000]; - sprintf(buffer, "%s/usercache", *nm_root); + sprintf(buffer, "%s/usercache", *tt_root); if (mkdir(buffer, 0755) != 0) { printf("FAIL: Can't create directory %s - %s\n", buffer, strerror(errno)); exit(1); } } - free_values(nm_roots); + free_values(tt_roots); } void test_get_user_directory() { @@ -130,49 +136,49 @@ void test_get_user_directory() { free(user_dir); } -void test_get_app_directory() { - char *expected = "/tmp/usercache/user/appcache/app_200906101234_0001"; - char *app_dir = (char *) get_app_directory("/tmp", "user", - "app_200906101234_0001"); - if (strcmp(app_dir, expected) != 0) { - printf("test_get_app_directory expected %s got %s\n", expected, app_dir); +void test_get_job_directory() { + char *expected = "/tmp/usercache/user/appcache/job_200906101234_0001"; + char *job_dir = (char *) get_job_directory("/tmp", "user", + "job_200906101234_0001"); + if (strcmp(job_dir, expected) != 0) { + printf("test_get_job_directory expected %s got %s\n", expected, job_dir); exit(1); } - free(app_dir); + free(job_dir); } -void test_get_container_directory() { - char *container_dir = get_container_work_directory("/tmp", "owen", "app_1", - "container_1"); - char *expected = "/tmp/usercache/owen/appcache/app_1/container_1"; - if (strcmp(container_dir, expected) != 0) { - printf("Fail get_container_work_directory got %s expected %s\n", - container_dir, expected); +void test_get_attempt_directory() { + char *attempt_dir = get_attempt_work_directory("/tmp", "owen", "job_1", + "attempt_1"); + char *expected = "/tmp/usercache/owen/appcache/job_1/attempt_1"; + if (strcmp(attempt_dir, expected) != 0) { + printf("Fail get_attempt_work_directory got %s expected %s\n", + attempt_dir, expected); exit(1); } - free(container_dir); + free(attempt_dir); } -void test_get_container_launcher_file() { - char *expected_file = ("/tmp/usercache/user/appcache/app_200906101234_0001" - "/launch_container.sh"); - char *app_dir = get_app_directory("/tmp", "user", - "app_200906101234_0001"); - char *container_file = get_container_launcher_file(app_dir); - if (strcmp(container_file, expected_file) != 0) { - printf("failure to match expected container file %s vs %s\n", container_file, +void test_get_task_launcher_file() { + char *expected_file = ("/tmp/usercache/user/appcache/job_200906101234_0001" + "/task.sh"); + char *job_dir = get_job_directory("/tmp", "user", + "job_200906101234_0001"); + char *task_file = get_task_launcher_file(job_dir); + if (strcmp(task_file, expected_file) != 0) { + printf("failure to match expected task file %s vs %s\n", task_file, expected_file); exit(1); } - free(app_dir); - free(container_file); + free(job_dir); + free(task_file); } -void test_get_app_log_dir() { - char *expected = TEST_ROOT "/logs/userlogs/app_200906101234_0001"; - char *logdir = get_app_log_directory(TEST_ROOT "/logs/userlogs","app_200906101234_0001"); +void test_get_job_log_dir() { + char *expected = TEST_ROOT "/logs/userlogs/job_200906101234_0001"; + char *logdir = get_job_log_directory(TEST_ROOT "/logs/userlogs","job_200906101234_0001"); if (strcmp(logdir, expected) != 0) { - printf("Fail get_app_log_dir got %s expected %s\n", logdir, expected); + printf("Fail get_job_log_dir got %s expected %s\n", logdir, expected); exit(1); } free(logdir); @@ -194,6 +200,10 @@ void test_check_user() { printf("FAIL: failed check for system user root\n"); exit(1); } + if (check_user("mapred") != NULL) { + printf("FAIL: failed check for hadoop user mapred\n"); + exit(1); + } } void test_check_configuration_permissions() { @@ -208,56 +218,56 @@ void test_check_configuration_permissions() { } } -void test_delete_container() { +void test_delete_task() { if (initialize_user(username)) { printf("FAIL: failed to initialize user %s\n", username); exit(1); } - char* app_dir = get_app_directory(TEST_ROOT "/local-2", username, "app_1"); - char* dont_touch = get_app_directory(TEST_ROOT "/local-2", username, + char* job_dir = get_job_directory(TEST_ROOT "/local-2", username, "job_1"); + char* dont_touch = get_job_directory(TEST_ROOT "/local-2", username, DONT_TOUCH_FILE); - char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", - username, "app_1", "container_1"); + char* task_dir = get_attempt_work_directory(TEST_ROOT "/local-2", + username, "job_1", "task_1"); char buffer[100000]; - sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir); + sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", task_dir); run(buffer); sprintf(buffer, "touch %s", dont_touch); run(buffer); - // soft link to the canary file from the container directory - sprintf(buffer, "ln -s %s %s/who/softlink", dont_touch, container_dir); + // soft link to the canary file from the task directory + sprintf(buffer, "ln -s %s %s/who/softlink", dont_touch, task_dir); run(buffer); - // hard link to the canary file from the container directory - sprintf(buffer, "ln %s %s/who/hardlink", dont_touch, container_dir); + // hard link to the canary file from the task directory + sprintf(buffer, "ln %s %s/who/hardlink", dont_touch, task_dir); run(buffer); - // create a dot file in the container directory - sprintf(buffer, "touch %s/who/let/.dotfile", container_dir); + // create a dot file in the task directory + sprintf(buffer, "touch %s/who/let/.dotfile", task_dir); run(buffer); // create a no permission file - sprintf(buffer, "touch %s/who/let/protect", container_dir); + sprintf(buffer, "touch %s/who/let/protect", task_dir); run(buffer); - sprintf(buffer, "chmod 000 %s/who/let/protect", container_dir); + sprintf(buffer, "chmod 000 %s/who/let/protect", task_dir); run(buffer); // create a no permission directory - sprintf(buffer, "chmod 000 %s/who/let", container_dir); + sprintf(buffer, "chmod 000 %s/who/let", task_dir); run(buffer); - // delete container directory - char * dirs[] = {app_dir, 0}; - int ret = delete_as_user(username, "container_1" , dirs); + // delete task directory + char * dirs[] = {job_dir, 0}; + int ret = delete_as_user(username, "task_1" , dirs); if (ret != 0) { printf("FAIL: return code from delete_as_user is %d\n", ret); exit(1); } - // check to make sure the container directory is gone - if (access(container_dir, R_OK) == 0) { - printf("FAIL: failed to delete the directory - %s\n", container_dir); + // check to make sure the task directory is gone + if (access(task_dir, R_OK) == 0) { + printf("FAIL: failed to delete the directory - %s\n", task_dir); exit(1); } - // check to make sure the app directory is not gone - if (access(app_dir, R_OK) != 0) { - printf("FAIL: accidently deleted the directory - %s\n", app_dir); + // check to make sure the job directory is not gone + if (access(job_dir, R_OK) != 0) { + printf("FAIL: accidently deleted the directory - %s\n", job_dir); exit(1); } // but that the canary is not gone @@ -265,60 +275,60 @@ void test_delete_container() { printf("FAIL: accidently deleted file %s\n", dont_touch); exit(1); } - sprintf(buffer, "chmod -R 700 %s", app_dir); + sprintf(buffer, "chmod -R 700 %s", job_dir); run(buffer); - sprintf(buffer, "rm -fr %s", app_dir); + sprintf(buffer, "rm -fr %s", job_dir); run(buffer); - free(app_dir); - free(container_dir); + free(job_dir); + free(task_dir); free(dont_touch); } -void test_delete_app() { - char* app_dir = get_app_directory(TEST_ROOT "/local-2", username, "app_2"); - char* dont_touch = get_app_directory(TEST_ROOT "/local-2", username, +void test_delete_job() { + char* job_dir = get_job_directory(TEST_ROOT "/local-2", username, "job_2"); + char* dont_touch = get_job_directory(TEST_ROOT "/local-2", username, DONT_TOUCH_FILE); - char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", - username, "app_2", "container_1"); + char* task_dir = get_attempt_work_directory(TEST_ROOT "/local-2", + username, "job_2", "task_1"); char buffer[100000]; - sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir); + sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", task_dir); run(buffer); sprintf(buffer, "touch %s", dont_touch); run(buffer); - // soft link to the canary file from the container directory - sprintf(buffer, "ln -s %s %s/who/softlink", dont_touch, container_dir); + // soft link to the canary file from the task directory + sprintf(buffer, "ln -s %s %s/who/softlink", dont_touch, task_dir); run(buffer); - // hard link to the canary file from the container directory - sprintf(buffer, "ln %s %s/who/hardlink", dont_touch, container_dir); + // hard link to the canary file from the task directory + sprintf(buffer, "ln %s %s/who/hardlink", dont_touch, task_dir); run(buffer); - // create a dot file in the container directory - sprintf(buffer, "touch %s/who/let/.dotfile", container_dir); + // create a dot file in the task directory + sprintf(buffer, "touch %s/who/let/.dotfile", task_dir); run(buffer); // create a no permission file - sprintf(buffer, "touch %s/who/let/protect", container_dir); + sprintf(buffer, "touch %s/who/let/protect", task_dir); run(buffer); - sprintf(buffer, "chmod 000 %s/who/let/protect", container_dir); + sprintf(buffer, "chmod 000 %s/who/let/protect", task_dir); run(buffer); // create a no permission directory - sprintf(buffer, "chmod 000 %s/who/let", container_dir); + sprintf(buffer, "chmod 000 %s/who/let", task_dir); run(buffer); - // delete container directory - int ret = delete_as_user(username, app_dir, NULL); + // delete task directory + int ret = delete_as_user(username, job_dir, NULL); if (ret != 0) { printf("FAIL: return code from delete_as_user is %d\n", ret); exit(1); } - // check to make sure the container directory is gone - if (access(container_dir, R_OK) == 0) { - printf("FAIL: failed to delete the directory - %s\n", container_dir); + // check to make sure the task directory is gone + if (access(task_dir, R_OK) == 0) { + printf("FAIL: failed to delete the directory - %s\n", task_dir); exit(1); } - // check to make sure the app directory is gone - if (access(app_dir, R_OK) == 0) { - printf("FAIL: didn't delete the directory - %s\n", app_dir); + // check to make sure the job directory is gone + if (access(job_dir, R_OK) == 0) { + printf("FAIL: didn't delete the directory - %s\n", job_dir); exit(1); } // but that the canary is not gone @@ -326,16 +336,16 @@ void test_delete_app() { printf("FAIL: accidently deleted file %s\n", dont_touch); exit(1); } - free(app_dir); - free(container_dir); + free(job_dir); + free(task_dir); free(dont_touch); } void test_delete_user() { printf("\nTesting delete_user\n"); - char* app_dir = get_app_directory(TEST_ROOT "/local-1", username, "app_3"); - if (mkdirs(app_dir, 0700) != 0) { + char* job_dir = get_job_directory(TEST_ROOT "/local-1", username, "job_3"); + if (mkdirs(job_dir, 0700) != 0) { exit(1); } char buffer[100000]; @@ -355,7 +365,7 @@ void test_delete_user() { printf("FAIL: local-1 directory does not exist\n"); exit(1); } - free(app_dir); + free(job_dir); } void run_test_in_child(const char* test_name, void (*func)()) { @@ -387,8 +397,8 @@ void run_test_in_child(const char* test_name, void (*func)()) { } } -void test_signal_container() { - printf("\nTesting signal_container\n"); +void test_signal_task() { + printf("\nTesting signal_task\n"); fflush(stdout); fflush(stderr); pid_t child = fork(); @@ -402,8 +412,8 @@ void test_signal_container() { sleep(3600); exit(0); } else { - printf("Child container launched as %d\n", child); - if (signal_container_as_user(username, child, SIGQUIT) != 0) { + printf("Child task launched as %d\n", child); + if (signal_user_task(username, child, SIGQUIT) != 0) { exit(1); } int status = 0; @@ -423,8 +433,8 @@ void test_signal_container() { } } -void test_signal_container_group() { - printf("\nTesting group signal_container\n"); +void test_signal_task_group() { + printf("\nTesting group signal_task\n"); fflush(stdout); fflush(stderr); pid_t child = fork(); @@ -439,8 +449,8 @@ void test_signal_container_group() { sleep(3600); exit(0); } - printf("Child container launched as %d\n", child); - if (signal_container_as_user(username, child, SIGKILL) != 0) { + printf("Child task launched as %d\n", child); + if (signal_user_task(username, child, SIGKILL) != 0) { exit(1); } int status = 0; @@ -459,8 +469,8 @@ void test_signal_container_group() { } } -void test_init_app() { - printf("\nTesting init app\n"); +void test_init_job() { + printf("\nTesting init job\n"); if (seteuid(0) != 0) { printf("FAIL: seteuid to root failed - %s\n", strerror(errno)); exit(1); @@ -499,12 +509,12 @@ void test_init_app() { fflush(stderr); pid_t child = fork(); if (child == -1) { - printf("FAIL: failed to fork process for init_app - %s\n", + printf("FAIL: failed to fork process for init_job - %s\n", strerror(errno)); exit(1); } else if (child == 0) { char *final_pgm[] = {"touch", "my-touch-file", 0}; - if (initialize_app(username, "app_4", TEST_ROOT "/creds.txt", final_pgm) != 0) { + if (initialize_job(username, "job_4", TEST_ROOT "/creds.txt", final_pgm) != 0) { printf("FAIL: failed in child\n"); exit(42); } @@ -517,37 +527,37 @@ void test_init_app() { strerror(errno)); exit(1); } - if (access(TEST_ROOT "/logs/userlogs/app_4", R_OK) != 0) { - printf("FAIL: failed to create app log directory\n"); + if (access(TEST_ROOT "/logs/userlogs/job_4", R_OK) != 0) { + printf("FAIL: failed to create job log directory\n"); exit(1); } - char* app_dir = get_app_directory(TEST_ROOT "/local-1", username, "app_4"); - if (access(app_dir, R_OK) != 0) { - printf("FAIL: failed to create app directory %s\n", app_dir); + char* job_dir = get_job_directory(TEST_ROOT "/local-1", username, "job_4"); + if (access(job_dir, R_OK) != 0) { + printf("FAIL: failed to create job directory %s\n", job_dir); exit(1); } char buffer[100000]; - sprintf(buffer, "%s/jobToken", app_dir); + sprintf(buffer, "%s/jobToken", job_dir); if (access(buffer, R_OK) != 0) { printf("FAIL: failed to create credentials %s\n", buffer); exit(1); } - sprintf(buffer, "%s/my-touch-file", app_dir); + sprintf(buffer, "%s/my-touch-file", job_dir); if (access(buffer, R_OK) != 0) { printf("FAIL: failed to create touch file %s\n", buffer); exit(1); } - free(app_dir); - app_dir = get_app_log_directory("logs","app_4"); - if (access(app_dir, R_OK) != 0) { - printf("FAIL: failed to create app log directory %s\n", app_dir); + free(job_dir); + job_dir = get_job_log_directory("logs","job_4"); + if (access(job_dir, R_OK) != 0) { + printf("FAIL: failed to create job log directory %s\n", job_dir); exit(1); } - free(app_dir); + free(job_dir); } -void test_run_container() { - printf("\nTesting run container\n"); +void test_run_task() { + printf("\nTesting run task\n"); if (seteuid(0) != 0) { printf("FAIL: seteuid to root failed - %s\n", strerror(errno)); exit(1); @@ -566,7 +576,7 @@ void test_run_container() { exit(1); } - const char* script_name = TEST_ROOT "/container-script"; + const char* script_name = TEST_ROOT "/task-script"; FILE* script = fopen(script_name, "w"); if (script == NULL) { printf("FAIL: failed to create script file - %s\n", strerror(errno)); @@ -588,17 +598,16 @@ void test_run_container() { } fflush(stdout); fflush(stderr); - char* container_dir = get_container_work_directory(TEST_ROOT "/local-1", - username, "app_4", "container_1"); - const char * pid_file = TEST_ROOT "/pid.txt"; + char* task_dir = get_attempt_work_directory(TEST_ROOT "/local-1", + username, "job_4", "task_1"); pid_t child = fork(); if (child == -1) { - printf("FAIL: failed to fork process for init_app - %s\n", + printf("FAIL: failed to fork process for init_job - %s\n", strerror(errno)); exit(1); } else if (child == 0) { - if (launch_container_as_user(username, "app_4", "container_1", - container_dir, script_name, TEST_ROOT "/creds.txt", pid_file) != 0) { + if (run_task_as_user(username, "job_4", "task_1", + task_dir, script_name, TEST_ROOT "creds.txt") != 0) { printf("FAIL: failed in child\n"); exit(42); } @@ -611,58 +620,31 @@ void test_run_container() { strerror(errno)); exit(1); } - if (access(TEST_ROOT "/logs/userlogs/app_4/container_1", R_OK) != 0) { - printf("FAIL: failed to create container log directory\n"); + if (access(TEST_ROOT "/logs/userlogs/job_4/task_1", R_OK) != 0) { + printf("FAIL: failed to create task log directory\n"); exit(1); } - if (access(container_dir, R_OK) != 0) { - printf("FAIL: failed to create container directory %s\n", container_dir); + if (access(task_dir, R_OK) != 0) { + printf("FAIL: failed to create task directory %s\n", task_dir); exit(1); } char buffer[100000]; - sprintf(buffer, "%s/foobar", container_dir); + sprintf(buffer, "%s/foobar", task_dir); if (access(buffer, R_OK) != 0) { printf("FAIL: failed to create touch file %s\n", buffer); exit(1); } - free(container_dir); - container_dir = get_app_log_directory("logs", "app_4/container_1"); - if (access(container_dir, R_OK) != 0) { - printf("FAIL: failed to create app log directory %s\n", container_dir); - exit(1); - } - free(container_dir); - - if(access(pid_file, R_OK) != 0) { - printf("FAIL: failed to create pid file %s\n", pid_file); - exit(1); - } - int pidfd = open(pid_file, O_RDONLY); - if (pidfd == -1) { - printf("FAIL: failed to open pid file %s - %s\n", pid_file, strerror(errno)); - exit(1); - } - - char pidBuf[100]; - ssize_t bytes = read(pidfd, pidBuf, 100); - if (bytes == -1) { - printf("FAIL: failed to read from pid file %s - %s\n", pid_file, strerror(errno)); - exit(1); - } - - pid_t mypid = child; - char myPidBuf[33]; - snprintf(myPidBuf, 33, "%d", mypid); - if (strncmp(pidBuf, myPidBuf, strlen(myPidBuf)) != 0) { - printf("FAIL: failed to find matching pid in pid file\n"); - printf("FAIL: Expected pid %d : Got %.*s", mypid, (int)bytes, pidBuf); + free(task_dir); + task_dir = get_job_log_directory("logs", "job_4/task_1"); + if (access(task_dir, R_OK) != 0) { + printf("FAIL: failed to create job log directory %s\n", task_dir); exit(1); } + free(task_dir); } int main(int argc, char **argv) { LOGFILE = stdout; - ERRORFILE = stderr; int my_username = 0; // clean up any junk from previous run @@ -677,7 +659,7 @@ int main(int argc, char **argv) { } read_config(TEST_ROOT "/test.cfg"); - create_nm_roots(); + create_tt_roots(); if (getuid() == 0 && argc == 2) { username = argv[1]; @@ -685,7 +667,7 @@ int main(int argc, char **argv) { username = strdup(getpwuid(getuid())->pw_name); my_username = 1; } - set_nm_uid(geteuid(), getegid()); + set_tasktracker_uid(geteuid(), getegid()); if (set_user(username)) { exit(1); @@ -696,25 +678,25 @@ int main(int argc, char **argv) { printf("\nTesting get_user_directory()\n"); test_get_user_directory(); - printf("\nTesting get_app_directory()\n"); - test_get_app_directory(); + printf("\nTesting get_job_directory()\n"); + test_get_job_directory(); - printf("\nTesting get_container_directory()\n"); - test_get_container_directory(); + printf("\nTesting get_attempt_directory()\n"); + test_get_attempt_directory(); - printf("\nTesting get_container_launcher_file()\n"); - test_get_container_launcher_file(); + printf("\nTesting get_task_launcher_file()\n"); + test_get_task_launcher_file(); - printf("\nTesting get_app_log_dir()\n"); - test_get_app_log_dir(); + printf("\nTesting get_job_log_dir()\n"); + test_get_job_log_dir(); test_check_configuration_permissions(); - printf("\nTesting delete_container()\n"); - test_delete_container(); + printf("\nTesting delete_task()\n"); + test_delete_task(); - printf("\nTesting delete_app()\n"); - test_delete_app(); + printf("\nTesting delete_job()\n"); + test_delete_job(); test_delete_user(); @@ -722,15 +704,15 @@ int main(int argc, char **argv) { // the tests that change user need to be run in a subshell, so that // when they change user they don't give up our privs - run_test_in_child("test_signal_container", test_signal_container); - run_test_in_child("test_signal_container_group", test_signal_container_group); + run_test_in_child("test_signal_task", test_signal_task); + run_test_in_child("test_signal_task_group", test_signal_task_group); - // init app and run container can't be run if you aren't testing as root + // init job and run task can't be run if you aren't testing as root if (getuid() == 0) { // these tests do internal forks so that the change_owner and execs // don't mess up our process. - test_init_app(); - test_run_container(); + test_init_job(); + test_run_task(); } seteuid(0);