diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index fe9ab5f2762..20e9f332c63 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -519,6 +519,10 @@ Release 2.8.0 - UNRELEASED YARN-4170. AM need to be notified with priority in AllocateResponse. (Sunil G via jianhe) + YARN-4267. Add additional logging to container launch implementations in + container-executor. (Sidharta Seethana via vvasudev) + + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index c696273b87b..f721697f251 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -1225,6 +1225,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id, if (docker_binary == NULL) { docker_binary = "docker"; } + + fprintf(LOGFILE, "Creating script paths...\n"); exit_code = create_script_paths( work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest, &container_file_source, &cred_file_source); @@ -1235,6 +1237,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, } gid_t user_gid = getegid(); + fprintf(LOGFILE, "Creating local dirs...\n"); exit_code = create_local_dirs(user, app_id, container_id, work_dir, script_name, cred_file, local_dirs, log_dirs, 1, script_file_dest, cred_file_dest, @@ -1245,6 +1248,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, goto cleanup; } + fprintf(LOGFILE, "Getting exit code file...\n"); exit_code_file = get_exit_code_file(pid_file); if (NULL == exit_code_file) { exit_code = OUT_OF_MEMORY; @@ -1253,6 +1257,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, goto cleanup; } + fprintf(LOGFILE, "Changing effective user to root...\n"); if (change_effective_user(0, user_gid) != 0) { fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", 0, user_gid); fflush(ERRORFILE); @@ -1261,6 +1266,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s", docker_binary, docker_command); + fprintf(LOGFILE, "Launching docker container...\n"); FILE* start_docker = popen(docker_command_with_binary, "r"); if (pclose (start_docker) != 0) { @@ -1275,6 +1281,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, "%s inspect --format {{.State.Pid}} %s", docker_binary, container_id); + fprintf(LOGFILE, "Inspecting docker container...\n"); FILE* inspect_docker = popen(docker_inspect_command, "r"); int pid = 0; int res = fscanf (inspect_docker, "%d", &pid); @@ -1288,6 +1295,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, } if (pid != 0) { + fprintf(LOGFILE, "Writing to cgroup task files...\n"); // cgroups-based resource enforcement if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) { // write pid to cgroups @@ -1301,7 +1309,9 @@ int launch_docker_container_as_user(const char * user, const char *app_id, } } } + // write pid to pidfile + fprintf(LOGFILE, "Writing pid file...\n"); if (pid_file == NULL || write_pid_to_file_as_nm(pid_file, (pid_t)pid) != 0) { exit_code = WRITE_PIDFILE_FAILED; @@ -1313,6 +1323,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, snprintf(docker_wait_command, EXECUTOR_PATH_MAX, "%s wait %s", docker_binary, container_id); + fprintf(LOGFILE, "Waiting for docker container to finish...\n"); FILE* wait_docker = popen(docker_wait_command, "r"); res = fscanf (wait_docker, "%d", &exit_code); if (pclose (wait_docker) != 0 || res <= 0) { @@ -1321,6 +1332,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id, fflush(ERRORFILE); } if(exit_code != 0) { + fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n", + exit_code); snprintf(docker_logs_command, EXECUTOR_PATH_MAX, "%s logs --tail=250 %s", docker_binary, container_id); FILE* logs = popen(docker_logs_command, "r"); @@ -1350,6 +1363,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, } } + fprintf(LOGFILE, "Removing docker container post-exit...\n"); snprintf(docker_rm_command, EXECUTOR_PATH_MAX, "%s rm %s", docker_binary, container_id); FILE* rm_docker = popen(docker_rm_command, "w"); @@ -1399,7 +1413,7 @@ int launch_container_as_user(const char *user, const char *app_id, char *cred_file_dest = NULL; char *exit_code_file = NULL; - + fprintf(LOGFILE, "Getting exit code file...\n"); exit_code_file = get_exit_code_file(pid_file); if (NULL == exit_code_file) { exit_code = OUT_OF_MEMORY; @@ -1408,6 +1422,8 @@ int launch_container_as_user(const char *user, const char *app_id, int container_file_source =-1; int cred_file_source = -1; + + fprintf(LOGFILE, "Creating script paths...\n"); exit_code = create_script_paths( work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest, &container_file_source, &cred_file_source); @@ -1431,6 +1447,7 @@ int launch_container_as_user(const char *user, const char *app_id, goto cleanup; } + fprintf(LOGFILE, "Writing pid file...\n"); // write pid to pidfile if (pid_file == NULL || write_pid_to_file_as_nm(pid_file, pid) != 0) { @@ -1438,6 +1455,7 @@ int launch_container_as_user(const char *user, const char *app_id, goto cleanup; } + fprintf(LOGFILE, "Writing to cgroup task files...\n"); // cgroups-based resource enforcement if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) { // write pid to cgroups @@ -1452,6 +1470,7 @@ int launch_container_as_user(const char *user, const char *app_id, } } + fprintf(LOGFILE, "Creating local dirs...\n"); exit_code = create_local_dirs(user, app_id, container_id, work_dir, script_name, cred_file, local_dirs, log_dirs, 0, script_file_dest, cred_file_dest, @@ -1462,6 +1481,8 @@ int launch_container_as_user(const char *user, const char *app_id, goto cleanup; } + fprintf(LOGFILE, "Launching container...\n"); + #if HAVE_FCLOSEALL fcloseall(); #else