YARN-8640. Restore previous state in container-executor after failure. Contributed by Jim Brennan

(cherry picked from commit d1d129aa9d)
This commit is contained in:
Jason Lowe 2018-08-14 10:21:03 -05:00
parent 74d4fd975a
commit 95cd6de5c6
1 changed files with 43 additions and 28 deletions

View File

@ -213,10 +213,12 @@ static int change_effective_user(uid_t user, gid_t group) {
* cgroup_file: Path to cgroup file where pid needs to be written to. * cgroup_file: Path to cgroup file where pid needs to be written to.
*/ */
static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) { static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) {
int rc = 0;
uid_t user = geteuid(); uid_t user = geteuid();
gid_t group = getegid(); gid_t group = getegid();
if (change_effective_user(0, 0) != 0) { if (change_effective_user(0, 0) != 0) {
return -1; rc = -1;
goto cleanup;
} }
// open // open
@ -224,7 +226,8 @@ static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) {
if (cgroup_fd == -1) { if (cgroup_fd == -1) {
fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", cgroup_file, fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", cgroup_file,
strerror(errno)); strerror(errno));
return -1; rc = -1;
goto cleanup;
} }
// write pid // write pid
@ -235,15 +238,17 @@ static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) {
if (written == -1) { if (written == -1) {
fprintf(LOGFILE, "Failed to write pid to file %s - %s\n", fprintf(LOGFILE, "Failed to write pid to file %s - %s\n",
cgroup_file, strerror(errno)); cgroup_file, strerror(errno));
return -1; rc = -1;
goto cleanup;
} }
cleanup:
// Revert back to the calling user. // Revert back to the calling user.
if (change_effective_user(user, group)) { if (change_effective_user(user, group)) {
return -1; rc = -1;
} }
return 0; return rc;
} }
#endif #endif
@ -252,15 +257,18 @@ static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) {
* pid_file: Path to pid file where pid needs to be written to * pid_file: Path to pid file where pid needs to be written to
*/ */
static int write_pid_to_file_as_nm(const char* pid_file, pid_t pid) { static int write_pid_to_file_as_nm(const char* pid_file, pid_t pid) {
int rc = 0;
char *temp_pid_file = NULL;
uid_t user = geteuid(); uid_t user = geteuid();
gid_t group = getegid(); gid_t group = getegid();
if (change_effective_user(nm_uid, nm_gid) != 0) { if (change_effective_user(nm_uid, nm_gid) != 0) {
fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", nm_uid, nm_gid); fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", nm_uid, nm_gid);
fflush(ERRORFILE); fflush(ERRORFILE);
return -1; rc = -1;
goto cleanup;
} }
char *temp_pid_file = concatenate("%s.tmp", "pid_file_path", 1, pid_file); temp_pid_file = concatenate("%s.tmp", "pid_file_path", 1, pid_file);
fprintf(LOGFILE, "Writing to tmp file %s\n", temp_pid_file); fprintf(LOGFILE, "Writing to tmp file %s\n", temp_pid_file);
fflush(LOGFILE); fflush(LOGFILE);
// create with 700 // create with 700
@ -269,8 +277,8 @@ static int write_pid_to_file_as_nm(const char* pid_file, pid_t pid) {
fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", temp_pid_file, fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", temp_pid_file,
strerror(errno)); strerror(errno));
fflush(LOGFILE); fflush(LOGFILE);
free(temp_pid_file); rc = -1;
return -1; goto cleanup;
} }
// write pid to temp file // write pid to temp file
@ -282,8 +290,8 @@ static int write_pid_to_file_as_nm(const char* pid_file, pid_t pid) {
fprintf(LOGFILE, "Failed to write pid to file %s as node manager - %s\n", fprintf(LOGFILE, "Failed to write pid to file %s as node manager - %s\n",
temp_pid_file, strerror(errno)); temp_pid_file, strerror(errno));
fflush(LOGFILE); fflush(LOGFILE);
free(temp_pid_file); rc = -1;
return -1; goto cleanup;
} }
// rename temp file to actual pid file // rename temp file to actual pid file
@ -293,36 +301,41 @@ static int write_pid_to_file_as_nm(const char* pid_file, pid_t pid) {
temp_pid_file, pid_file, strerror(errno)); temp_pid_file, pid_file, strerror(errno));
fflush(LOGFILE); fflush(LOGFILE);
unlink(temp_pid_file); unlink(temp_pid_file);
free(temp_pid_file); rc = -1;
return -1; goto cleanup;
} }
cleanup:
// Revert back to the calling user. // Revert back to the calling user.
if (change_effective_user(user, group)) { if (change_effective_user(user, group)) {
free(temp_pid_file); rc = -1;
return -1;
} }
free(temp_pid_file); free(temp_pid_file);
return 0; return rc;
} }
/** /**
* Write the exit code of the container into the exit code file * Write the exit code of the container into the exit code file
* exit_code_file: Path to exit code file where exit code needs to be written * exit_code_file: Path to exit code file where exit code needs to be written
*/ */
static int write_exit_code_file_as_nm(const char* exit_code_file, int exit_code) { static int write_exit_code_file_as_nm(const char* exit_code_file,
int exit_code) {
char *tmp_ecode_file = NULL;
int rc = 0;
uid_t user = geteuid(); uid_t user = geteuid();
gid_t group = getegid(); gid_t group = getegid();
if (change_effective_user(nm_uid, nm_gid) != 0) { if (change_effective_user(nm_uid, nm_gid) != 0) {
fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", nm_uid, nm_gid); fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", nm_uid, nm_gid);
fflush(ERRORFILE); fflush(ERRORFILE);
return -1; rc = -1;
goto cleanup;
} }
char *tmp_ecode_file = concatenate("%s.tmp", "exit_code_path", 1, tmp_ecode_file = concatenate("%s.tmp", "exit_code_path", 1,
exit_code_file); exit_code_file);
if (tmp_ecode_file == NULL) { if (tmp_ecode_file == NULL) {
return -1; rc = -1;
goto cleanup;
} }
// create with 700 // create with 700
@ -330,8 +343,8 @@ static int write_exit_code_file_as_nm(const char* exit_code_file, int exit_code)
if (ecode_fd == -1) { if (ecode_fd == -1) {
fprintf(LOGFILE, "Can't open file %s - %s\n", tmp_ecode_file, fprintf(LOGFILE, "Can't open file %s - %s\n", tmp_ecode_file,
strerror(errno)); strerror(errno));
free(tmp_ecode_file); rc = -1;
return -1; goto cleanup;
} }
char ecode_buf[21]; char ecode_buf[21];
@ -341,8 +354,8 @@ static int write_exit_code_file_as_nm(const char* exit_code_file, int exit_code)
if (written == -1) { if (written == -1) {
fprintf(LOGFILE, "Failed to write exit code to file %s - %s\n", fprintf(LOGFILE, "Failed to write exit code to file %s - %s\n",
tmp_ecode_file, strerror(errno)); tmp_ecode_file, strerror(errno));
free(tmp_ecode_file); rc = -1;
return -1; goto cleanup;
} }
// rename temp file to actual exit code file // rename temp file to actual exit code file
@ -351,19 +364,21 @@ static int write_exit_code_file_as_nm(const char* exit_code_file, int exit_code)
fprintf(LOGFILE, "Can't move exit code file from %s to %s - %s\n", fprintf(LOGFILE, "Can't move exit code file from %s to %s - %s\n",
tmp_ecode_file, exit_code_file, strerror(errno)); tmp_ecode_file, exit_code_file, strerror(errno));
unlink(tmp_ecode_file); unlink(tmp_ecode_file);
free(tmp_ecode_file); rc = -1;
return -1; goto cleanup;
} }
cleanup:
// always change back // always change back
if (change_effective_user(user, group) != 0) { if (change_effective_user(user, group) != 0) {
fprintf(ERRORFILE, fprintf(ERRORFILE,
"Could not change to effective users %d, %d\n", user, group); "Could not change to effective users %d, %d\n", user, group);
fflush(ERRORFILE); fflush(ERRORFILE);
return -1; rc = -1;
} }
free(tmp_ecode_file); free(tmp_ecode_file);
return 0; return rc;
} }
static int wait_and_get_exit_code(pid_t pid) { static int wait_and_get_exit_code(pid_t pid) {