YARN-4267. Add additional logging to container launch implementations in container-executor. Contributed by Sidharta Seethana.

(cherry picked from commit 151eab2a12)
This commit is contained in:
Varun Vasudev 2015-10-19 20:56:01 +05:30
parent 196d549e2b
commit e4429de86f
2 changed files with 26 additions and 1 deletions

View File

@ -467,6 +467,10 @@ Release 2.8.0 - UNRELEASED
YARN-4170. AM need to be notified with priority in AllocateResponse. YARN-4170. AM need to be notified with priority in AllocateResponse.
(Sunil G via jianhe) (Sunil G via jianhe)
YARN-4267. Add additional logging to container launch implementations in
container-executor. (Sidharta Seethana via vvasudev)
OPTIMIZATIONS OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -1225,6 +1225,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
if (docker_binary == NULL) { if (docker_binary == NULL) {
docker_binary = "docker"; docker_binary = "docker";
} }
fprintf(LOGFILE, "Creating script paths...\n");
exit_code = create_script_paths( exit_code = create_script_paths(
work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest, work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest,
&container_file_source, &cred_file_source); &container_file_source, &cred_file_source);
@ -1235,6 +1237,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
} }
gid_t user_gid = getegid(); gid_t user_gid = getegid();
fprintf(LOGFILE, "Creating local dirs...\n");
exit_code = create_local_dirs(user, app_id, container_id, exit_code = create_local_dirs(user, app_id, container_id,
work_dir, script_name, cred_file, local_dirs, log_dirs, work_dir, script_name, cred_file, local_dirs, log_dirs,
1, script_file_dest, cred_file_dest, 1, script_file_dest, cred_file_dest,
@ -1245,6 +1248,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
goto cleanup; goto cleanup;
} }
fprintf(LOGFILE, "Getting exit code file...\n");
exit_code_file = get_exit_code_file(pid_file); exit_code_file = get_exit_code_file(pid_file);
if (NULL == exit_code_file) { if (NULL == exit_code_file) {
exit_code = OUT_OF_MEMORY; exit_code = OUT_OF_MEMORY;
@ -1253,6 +1257,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
goto cleanup; goto cleanup;
} }
fprintf(LOGFILE, "Changing effective user to root...\n");
if (change_effective_user(0, user_gid) != 0) { if (change_effective_user(0, user_gid) != 0) {
fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", 0, user_gid); fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", 0, user_gid);
fflush(ERRORFILE); fflush(ERRORFILE);
@ -1261,6 +1266,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s", docker_binary, docker_command); snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s", docker_binary, docker_command);
fprintf(LOGFILE, "Launching docker container...\n");
FILE* start_docker = popen(docker_command_with_binary, "r"); FILE* start_docker = popen(docker_command_with_binary, "r");
if (pclose (start_docker) != 0) if (pclose (start_docker) != 0)
{ {
@ -1275,6 +1281,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
"%s inspect --format {{.State.Pid}} %s", "%s inspect --format {{.State.Pid}} %s",
docker_binary, container_id); docker_binary, container_id);
fprintf(LOGFILE, "Inspecting docker container...\n");
FILE* inspect_docker = popen(docker_inspect_command, "r"); FILE* inspect_docker = popen(docker_inspect_command, "r");
int pid = 0; int pid = 0;
int res = fscanf (inspect_docker, "%d", &pid); int res = fscanf (inspect_docker, "%d", &pid);
@ -1288,6 +1295,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
} }
if (pid != 0) { if (pid != 0) {
fprintf(LOGFILE, "Writing to cgroup task files...\n");
// cgroups-based resource enforcement // cgroups-based resource enforcement
if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) { if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) {
// write pid to cgroups // write pid to cgroups
@ -1301,7 +1309,9 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
} }
} }
} }
// write pid to pidfile // write pid to pidfile
fprintf(LOGFILE, "Writing pid file...\n");
if (pid_file == NULL if (pid_file == NULL
|| write_pid_to_file_as_nm(pid_file, (pid_t)pid) != 0) { || write_pid_to_file_as_nm(pid_file, (pid_t)pid) != 0) {
exit_code = WRITE_PIDFILE_FAILED; exit_code = WRITE_PIDFILE_FAILED;
@ -1313,6 +1323,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
snprintf(docker_wait_command, EXECUTOR_PATH_MAX, snprintf(docker_wait_command, EXECUTOR_PATH_MAX,
"%s wait %s", docker_binary, container_id); "%s wait %s", docker_binary, container_id);
fprintf(LOGFILE, "Waiting for docker container to finish...\n");
FILE* wait_docker = popen(docker_wait_command, "r"); FILE* wait_docker = popen(docker_wait_command, "r");
res = fscanf (wait_docker, "%d", &exit_code); res = fscanf (wait_docker, "%d", &exit_code);
if (pclose (wait_docker) != 0 || res <= 0) { if (pclose (wait_docker) != 0 || res <= 0) {
@ -1321,6 +1332,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
fflush(ERRORFILE); fflush(ERRORFILE);
} }
if(exit_code != 0) { if(exit_code != 0) {
fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n",
exit_code);
snprintf(docker_logs_command, EXECUTOR_PATH_MAX, "%s logs --tail=250 %s", snprintf(docker_logs_command, EXECUTOR_PATH_MAX, "%s logs --tail=250 %s",
docker_binary, container_id); docker_binary, container_id);
FILE* logs = popen(docker_logs_command, "r"); FILE* logs = popen(docker_logs_command, "r");
@ -1350,6 +1363,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
} }
} }
fprintf(LOGFILE, "Removing docker container post-exit...\n");
snprintf(docker_rm_command, EXECUTOR_PATH_MAX, snprintf(docker_rm_command, EXECUTOR_PATH_MAX,
"%s rm %s", docker_binary, container_id); "%s rm %s", docker_binary, container_id);
FILE* rm_docker = popen(docker_rm_command, "w"); FILE* rm_docker = popen(docker_rm_command, "w");
@ -1399,7 +1413,7 @@ int launch_container_as_user(const char *user, const char *app_id,
char *cred_file_dest = NULL; char *cred_file_dest = NULL;
char *exit_code_file = NULL; char *exit_code_file = NULL;
fprintf(LOGFILE, "Getting exit code file...\n");
exit_code_file = get_exit_code_file(pid_file); exit_code_file = get_exit_code_file(pid_file);
if (NULL == exit_code_file) { if (NULL == exit_code_file) {
exit_code = OUT_OF_MEMORY; exit_code = OUT_OF_MEMORY;
@ -1408,6 +1422,8 @@ int launch_container_as_user(const char *user, const char *app_id,
int container_file_source =-1; int container_file_source =-1;
int cred_file_source = -1; int cred_file_source = -1;
fprintf(LOGFILE, "Creating script paths...\n");
exit_code = create_script_paths( exit_code = create_script_paths(
work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest, work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest,
&container_file_source, &cred_file_source); &container_file_source, &cred_file_source);
@ -1431,6 +1447,7 @@ int launch_container_as_user(const char *user, const char *app_id,
goto cleanup; goto cleanup;
} }
fprintf(LOGFILE, "Writing pid file...\n");
// write pid to pidfile // write pid to pidfile
if (pid_file == NULL if (pid_file == NULL
|| write_pid_to_file_as_nm(pid_file, pid) != 0) { || write_pid_to_file_as_nm(pid_file, pid) != 0) {
@ -1438,6 +1455,7 @@ int launch_container_as_user(const char *user, const char *app_id,
goto cleanup; goto cleanup;
} }
fprintf(LOGFILE, "Writing to cgroup task files...\n");
// cgroups-based resource enforcement // cgroups-based resource enforcement
if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) { if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) {
// write pid to cgroups // write pid to cgroups
@ -1452,6 +1470,7 @@ int launch_container_as_user(const char *user, const char *app_id,
} }
} }
fprintf(LOGFILE, "Creating local dirs...\n");
exit_code = create_local_dirs(user, app_id, container_id, exit_code = create_local_dirs(user, app_id, container_id,
work_dir, script_name, cred_file, local_dirs, log_dirs, work_dir, script_name, cred_file, local_dirs, log_dirs,
0, script_file_dest, cred_file_dest, 0, script_file_dest, cred_file_dest,
@ -1462,6 +1481,8 @@ int launch_container_as_user(const char *user, const char *app_id,
goto cleanup; goto cleanup;
} }
fprintf(LOGFILE, "Launching container...\n");
#if HAVE_FCLOSEALL #if HAVE_FCLOSEALL
fcloseall(); fcloseall();
#else #else