YARN-4267. Add additional logging to container launch implementations in container-executor. Contributed by Sidharta Seethana.

This commit is contained in:
Varun Vasudev 2015-10-19 20:56:01 +05:30
parent e39ae0e676
commit 151eab2a12
2 changed files with 26 additions and 1 deletions

View File

@ -519,6 +519,10 @@ Release 2.8.0 - UNRELEASED
YARN-4170. AM need to be notified with priority in AllocateResponse.
(Sunil G via jianhe)
YARN-4267. Add additional logging to container launch implementations in
container-executor. (Sidharta Seethana via vvasudev)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -1225,6 +1225,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
if (docker_binary == NULL) {
docker_binary = "docker";
}
fprintf(LOGFILE, "Creating script paths...\n");
exit_code = create_script_paths(
work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest,
&container_file_source, &cred_file_source);
@ -1235,6 +1237,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
}
gid_t user_gid = getegid();
fprintf(LOGFILE, "Creating local dirs...\n");
exit_code = create_local_dirs(user, app_id, container_id,
work_dir, script_name, cred_file, local_dirs, log_dirs,
1, script_file_dest, cred_file_dest,
@ -1245,6 +1248,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
goto cleanup;
}
fprintf(LOGFILE, "Getting exit code file...\n");
exit_code_file = get_exit_code_file(pid_file);
if (NULL == exit_code_file) {
exit_code = OUT_OF_MEMORY;
@ -1253,6 +1257,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
goto cleanup;
}
fprintf(LOGFILE, "Changing effective user to root...\n");
if (change_effective_user(0, user_gid) != 0) {
fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", 0, user_gid);
fflush(ERRORFILE);
@ -1261,6 +1266,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s", docker_binary, docker_command);
fprintf(LOGFILE, "Launching docker container...\n");
FILE* start_docker = popen(docker_command_with_binary, "r");
if (pclose (start_docker) != 0)
{
@ -1275,6 +1281,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
"%s inspect --format {{.State.Pid}} %s",
docker_binary, container_id);
fprintf(LOGFILE, "Inspecting docker container...\n");
FILE* inspect_docker = popen(docker_inspect_command, "r");
int pid = 0;
int res = fscanf (inspect_docker, "%d", &pid);
@ -1288,6 +1295,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
}
if (pid != 0) {
fprintf(LOGFILE, "Writing to cgroup task files...\n");
// cgroups-based resource enforcement
if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) {
// write pid to cgroups
@ -1301,7 +1309,9 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
}
}
}
// write pid to pidfile
fprintf(LOGFILE, "Writing pid file...\n");
if (pid_file == NULL
|| write_pid_to_file_as_nm(pid_file, (pid_t)pid) != 0) {
exit_code = WRITE_PIDFILE_FAILED;
@ -1313,6 +1323,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
snprintf(docker_wait_command, EXECUTOR_PATH_MAX,
"%s wait %s", docker_binary, container_id);
fprintf(LOGFILE, "Waiting for docker container to finish...\n");
FILE* wait_docker = popen(docker_wait_command, "r");
res = fscanf (wait_docker, "%d", &exit_code);
if (pclose (wait_docker) != 0 || res <= 0) {
@ -1321,6 +1332,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
fflush(ERRORFILE);
}
if(exit_code != 0) {
fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n",
exit_code);
snprintf(docker_logs_command, EXECUTOR_PATH_MAX, "%s logs --tail=250 %s",
docker_binary, container_id);
FILE* logs = popen(docker_logs_command, "r");
@ -1350,6 +1363,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
}
}
fprintf(LOGFILE, "Removing docker container post-exit...\n");
snprintf(docker_rm_command, EXECUTOR_PATH_MAX,
"%s rm %s", docker_binary, container_id);
FILE* rm_docker = popen(docker_rm_command, "w");
@ -1399,7 +1413,7 @@ int launch_container_as_user(const char *user, const char *app_id,
char *cred_file_dest = NULL;
char *exit_code_file = NULL;
fprintf(LOGFILE, "Getting exit code file...\n");
exit_code_file = get_exit_code_file(pid_file);
if (NULL == exit_code_file) {
exit_code = OUT_OF_MEMORY;
@ -1408,6 +1422,8 @@ int launch_container_as_user(const char *user, const char *app_id,
int container_file_source =-1;
int cred_file_source = -1;
fprintf(LOGFILE, "Creating script paths...\n");
exit_code = create_script_paths(
work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest,
&container_file_source, &cred_file_source);
@ -1431,6 +1447,7 @@ int launch_container_as_user(const char *user, const char *app_id,
goto cleanup;
}
fprintf(LOGFILE, "Writing pid file...\n");
// write pid to pidfile
if (pid_file == NULL
|| write_pid_to_file_as_nm(pid_file, pid) != 0) {
@ -1438,6 +1455,7 @@ int launch_container_as_user(const char *user, const char *app_id,
goto cleanup;
}
fprintf(LOGFILE, "Writing to cgroup task files...\n");
// cgroups-based resource enforcement
if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) {
// write pid to cgroups
@ -1452,6 +1470,7 @@ int launch_container_as_user(const char *user, const char *app_id,
}
}
fprintf(LOGFILE, "Creating local dirs...\n");
exit_code = create_local_dirs(user, app_id, container_id,
work_dir, script_name, cred_file, local_dirs, log_dirs,
0, script_file_dest, cred_file_dest,
@ -1462,6 +1481,8 @@ int launch_container_as_user(const char *user, const char *app_id,
goto cleanup;
}
fprintf(LOGFILE, "Launching container...\n");
#if HAVE_FCLOSEALL
fcloseall();
#else