Revert "HBASE-25663 Make graceful_stop localhostname compare match even if fqdn (#3048)"

This reverts commit f4e1ab7b1d.
This commit is contained in:
stack 2021-03-15 21:29:00 -07:00
parent 5457554826
commit c36e40eb1c
2 changed files with 53 additions and 21 deletions

View File

@ -19,6 +19,7 @@
# */ # */
# Move regions off a server then stop it. Optionally restart and reload. # Move regions off a server then stop it. Optionally restart and reload.
# Turn off the balancer before running this script.
function usage { function usage {
echo "Usage: graceful_stop.sh [--config <conf-dir>] [-e] [--restart [--reload]] [--thrift] \ echo "Usage: graceful_stop.sh [--config <conf-dir>] [-e] [--restart [--reload]] [--thrift] \
[--rest] [-n |--noack] [--maxthreads <number of threads>] [--movetimeout <timeout in seconds>] \ [--rest] [-n |--noack] [--maxthreads <number of threads>] [--movetimeout <timeout in seconds>] \
@ -32,7 +33,7 @@ moving regions"
echo " maxthreads xx Limit the number of threads used by the region mover. Default value is 1." echo " maxthreads xx Limit the number of threads used by the region mover. Default value is 1."
echo " movetimeout xx Timeout for moving regions. If regions are not moved by the timeout value,\ echo " movetimeout xx Timeout for moving regions. If regions are not moved by the timeout value,\
exit with error. Default value is INT_MAX." exit with error. Default value is INT_MAX."
echo " hostname Hostname to stop; match what HBase uses; pass 'localhost' if local to avoid ssh" echo " hostname Hostname of server we are to stop"
echo " e|failfast Set -e so exit immediately if any command exits with non-zero status" echo " e|failfast Set -e so exit immediately if any command exits with non-zero status"
echo " nob|nobalancer Do not manage balancer states. This is only used as optimization in \ echo " nob|nobalancer Do not manage balancer states. This is only used as optimization in \
rolling_restart.sh to avoid multiple calls to hbase shell" rolling_restart.sh to avoid multiple calls to hbase shell"
@ -101,6 +102,13 @@ fi
hostname=$1 hostname=$1
filename="/tmp/$hostname" filename="/tmp/$hostname"
local=
localhostname=`/bin/hostname`
if [ "$localhostname" == "$hostname" ]; then
local=true
fi
if [ "$nob" == "true" ]; then if [ "$nob" == "true" ]; then
log "[ $0 ] skipping disabling balancer -nob argument is used" log "[ $0 ] skipping disabling balancer -nob argument is used"
HBASE_BALANCER_STATE=false HBASE_BALANCER_STATE=false
@ -111,7 +119,7 @@ else
fi fi
unload_args="--filename $filename --maxthreads $maxthreads $noack --operation unload \ unload_args="--filename $filename --maxthreads $maxthreads $noack --operation unload \
--timeout $movetimeout --regionserverhost $hostname" --timeout $movetimeout --regionserverhost $hostname"
if [ "$designatedfile" != "" ]; then if [ "$designatedfile" != "" ]; then
unload_args="$unload_args --designatedfile $designatedfile" unload_args="$unload_args --designatedfile $designatedfile"
@ -131,26 +139,50 @@ hosts="/tmp/$(basename $0).$$.tmp"
echo $hostname >> $hosts echo $hostname >> $hosts
if [ "$thrift" != "" ]; then if [ "$thrift" != "" ]; then
log "Stopping thrift server on $hostname" log "Stopping thrift server on $hostname"
if [ "$local" == true ]; then
"$bin"/hbase-daemon.sh --config ${HBASE_CONF_DIR} stop thrift
else
"$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} stop thrift "$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} stop thrift
fi
fi fi
if [ "$rest" != "" ]; then if [ "$rest" != "" ]; then
log "Stopping rest server on $hostname" log "Stopping rest server on $hostname"
if [ "$local" == true ]; then
"$bin"/hbase-daemon.sh --config ${HBASE_CONF_DIR} stop rest
else
"$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} stop rest "$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} stop rest
fi
fi fi
log "Stopping regionserver on $hostname" log "Stopping regionserver on $hostname"
"$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} stop regionserver if [ "$local" == true ]; then
"$bin"/hbase-daemon.sh --config ${HBASE_CONF_DIR} stop regionserver
else
"$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} stop regionserver
fi
if [ "$restart" != "" ]; then if [ "$restart" != "" ]; then
log "Restarting regionserver on $hostname" log "Restarting regionserver on $hostname"
if [ "$local" == true ]; then
"$bin"/hbase-daemon.sh --config ${HBASE_CONF_DIR} start regionserver
else
"$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} start regionserver "$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} start regionserver
fi
if [ "$thrift" != "" ]; then if [ "$thrift" != "" ]; then
log "Restarting thrift server on $hostname" log "Restarting thrift server on $hostname"
# -b 0.0.0.0 says listen on all interfaces rather than just default. # -b 0.0.0.0 says listen on all interfaces rather than just default.
if [ "$local" == true ]; then
"$bin"/hbase-daemon.sh --config ${HBASE_CONF_DIR} start thrift -b 0.0.0.0
else
"$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} start thrift -b 0.0.0.0 "$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} start thrift -b 0.0.0.0
fi fi
fi
if [ "$rest" != "" ]; then if [ "$rest" != "" ]; then
log "Restarting rest server on $hostname" log "Restarting rest server on $hostname"
if [ "$local" == true ]; then
"$bin"/hbase-daemon.sh --config ${HBASE_CONF_DIR} start rest
else
"$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} start rest "$bin"/hbase-daemons.sh --config ${HBASE_CONF_DIR} --hosts ${hosts} start rest
fi fi
fi
if [ "$reload" != "" ]; then if [ "$reload" != "" ]; then
log "Reloading $hostname region(s)" log "Reloading $hostname region(s)"
HBASE_NOEXEC=true "$bin"/hbase --config ${HBASE_CONF_DIR} \ HBASE_NOEXEC=true "$bin"/hbase --config ${HBASE_CONF_DIR} \
@ -169,4 +201,4 @@ else
fi fi
# Cleanup tmp files. # Cleanup tmp files.
trap "rm -f /tmp/$(basename $0).*.tmp &> /dev/null" EXIT trap "rm -f "/tmp/$(basename $0).*.tmp" &> /dev/null" EXIT

View File

@ -1364,9 +1364,10 @@ Copy the script if you need to make use of it in a version of hbase previous to
A downside to the above stop of a RegionServer is that regions could be offline for a good period of time. A downside to the above stop of a RegionServer is that regions could be offline for a good period of time.
Regions are closed in order. Regions are closed in order.
If many regions on the server, the first region to close may not be back online until all regions close and If many regions on the server, the first region to close may not be back online until all regions close and after the master notices the RegionServer's znode gone.
after the master notices the RegionServer's znode gone. A node can be asked to gradually shed its load and In Apache HBase 0.90.2, we added facility for having a node gradually shed its load and then shutdown itself down.
then shutdown itself using the _graceful_stop.sh_ script. Here is its usage: Apache HBase 0.90.2 added the _graceful_stop.sh_ script.
Here is its usage:
---- ----
$ ./bin/graceful_stop.sh $ ./bin/graceful_stop.sh
@ -1392,17 +1393,16 @@ To decommission a loaded RegionServer, run the following: +$
[NOTE] [NOTE]
==== ====
The `HOSTNAME` passed to _graceful_stop.sh_ must match the hostname that hbase is using to identify RegionServers. The `HOSTNAME` passed to _graceful_stop.sh_ must match the hostname that hbase is using to identify RegionServers.
HBase uses fully-qualified domain names usually. Check the list of RegionServers in the master UI for how HBase Check the list of RegionServers in the master UI for how HBase is referring to servers.
is referring to servers. Whatever HBase is using, this is what you should pass the _graceful_stop.sh_ decommission script. It's usually hostname but can also be FQDN.
If you pass IPs, the script is not yet smart enough to make a hostname (or FQDN) of it and so it will fail when it checks Whatever HBase is using, this is what you should pass the _graceful_stop.sh_ decommission script.
if server is currently running; the graceful unloading of regions will not run. If you pass IPs, the script is not yet smart enough to make a hostname (or FQDN) of it and so it will fail when it checks if server is currently running; the graceful unloading of regions will not run.
==== ====
The _graceful_stop.sh_ script will move the regions off the decommissioned RegionServer one at a time to minimize region churn. The _graceful_stop.sh_ script will move the regions off the decommissioned RegionServer one at a time to minimize region churn.
It will verify the region deployed in the new location before it will moves the next region and so on until the decommissioned It will verify the region deployed in the new location before it will moves the next region and so on until the decommissioned server is carrying zero regions.
server is carrying zero regions. At this point, the _graceful_stop.sh_ tells the RegionServer `stop`. At this point, the _graceful_stop.sh_ tells the RegionServer `stop`.
The master will at this point notice the RegionServer gone but all regions will have already been redeployed and because the The master will at this point notice the RegionServer gone but all regions will have already been redeployed and because the RegionServer went down cleanly, there will be no WAL logs to split.
RegionServer went down cleanly, there will be no WAL logs to split.
[[lb]] [[lb]]
.Load Balancer .Load Balancer