HADOOP-15527. Improve delay check for stopping processes.

Contributed by Vinod Kumar Vavilapalli

(cherry picked from commit 108da85320)
This commit is contained in:
Eric Yang 2018-06-12 20:40:32 -04:00
parent 6f31faf92b
commit f516a7a85f
2 changed files with 56 additions and 2 deletions

View File

@ -2030,6 +2030,35 @@ function hadoop_start_secure_daemon_wrapper
return 0 return 0
} }
## @description Wait till process dies or till timeout
## @audience private
## @stability evolving
## @param pid
## @param timeout
function wait_process_to_die_or_timeout
{
local pid=$1
local timeout=$2
# Normalize timeout
# Round up or down
timeout=$(printf "%.0f\n" "${timeout}")
if [[ ${timeout} -lt 1 ]]; then
# minimum 1 second
timeout=1
fi
# Wait to see if it's still alive
for (( i=0; i < "${timeout}"; i++ ))
do
if kill -0 "${pid}" > /dev/null 2>&1; then
sleep 1
else
break
fi
done
}
## @description Stop the non-privileged `command` daemon with that ## @description Stop the non-privileged `command` daemon with that
## @description that is running at `pidfile`. ## @description that is running at `pidfile`.
## @audience public ## @audience public
@ -2050,11 +2079,14 @@ function hadoop_stop_daemon
pid=$(cat "$pidfile") pid=$(cat "$pidfile")
kill "${pid}" >/dev/null 2>&1 kill "${pid}" >/dev/null 2>&1
sleep "${HADOOP_STOP_TIMEOUT}"
wait_process_to_die_or_timeout "${pid}" "${HADOOP_STOP_TIMEOUT}"
if kill -0 "${pid}" > /dev/null 2>&1; then if kill -0 "${pid}" > /dev/null 2>&1; then
hadoop_error "WARNING: ${cmd} did not stop gracefully after ${HADOOP_STOP_TIMEOUT} seconds: Trying to kill with kill -9" hadoop_error "WARNING: ${cmd} did not stop gracefully after ${HADOOP_STOP_TIMEOUT} seconds: Trying to kill with kill -9"
kill -9 "${pid}" >/dev/null 2>&1 kill -9 "${pid}" >/dev/null 2>&1
fi fi
wait_process_to_die_or_timeout "${pid}" "${HADOOP_STOP_TIMEOUT}"
if ps -p "${pid}" > /dev/null 2>&1; then if ps -p "${pid}" > /dev/null 2>&1; then
hadoop_error "ERROR: Unable to kill ${pid}" hadoop_error "ERROR: Unable to kill ${pid}"
else else

View File

@ -15,7 +15,7 @@
load hadoop-functions_test_helper load hadoop-functions_test_helper
@test "hadoop_stop_daemon" { @test "hadoop_stop_daemon_changing_pid" {
old_pid=12345 old_pid=12345
new_pid=54321 new_pid=54321
HADOOP_STOP_TIMEOUT=3 HADOOP_STOP_TIMEOUT=3
@ -29,3 +29,25 @@ load hadoop-functions_test_helper
[ -f pidfile ] [ -f pidfile ]
[ "$(cat pidfile)" = "${new_pid}" ] [ "$(cat pidfile)" = "${new_pid}" ]
} }
@test "hadoop_stop_daemon_force_kill" {
HADOOP_STOP_TIMEOUT=4
# Run the following in a sub-shell so that its termination doesn't affect the test
(sh ${TESTBINDIR}/process_with_sigterm_trap.sh ${TMP}/pidfile &)
# Wait for the process to go into tight loop
sleep 1
[ -f ${TMP}/pidfile ]
pid=$(cat "${TMP}/pidfile")
run hadoop_stop_daemon my_command ${TMP}/pidfile 2>&1
# The process should no longer be alive
! kill -0 ${pid} > /dev/null 2>&1
# The PID file should be gone
[ ! -f ${TMP}/pidfile ]
}