HBASE-3511. Allow rolling restart to apply to only RS or only masters

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1068158 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2011-02-07 21:57:47 +00:00
parent e0dbeec47f
commit 69eec35098
2 changed files with 80 additions and 45 deletions

View File

@ -101,6 +101,7 @@ Release 0.90.1 - Unreleased
IMPROVEMENTS
HBASE-3470 Check that hbase-default.xml is loaded from within jar
HBASE-3508 LruBlockCache statistics thread should have a name
HBASE-3511 Allow rolling restart to apply to only RS or only masters
Release 0.90.0 - January 19th, 2011
INCOMPATIBLE CHANGES

View File

@ -34,7 +34,7 @@
#
# Modelled after $HADOOP_HOME/bin/slaves.sh.
usage="Usage: $0 [--config <hbase-confdir>] commands..."
usage="Usage: $0 [--config <hbase-confdir>] [--rs-only] [--master-only]"
bin=`dirname "$0"`
bin=`cd "$bin">/dev/null; pwd`
@ -48,58 +48,92 @@ then
exit $errCode
fi
function usage() {
echo $usage
exit 1
}
RR_RS=1
RR_MASTER=1
for x in "$@" ; do
case "$x" in
--rs-only|-r)
RR_RS=1
RR_MASTER=0
;;
--master-only)
RR_RS=0
RR_MASTER=1
;;
*)
echo Bad argument: $x
usage
exit 1
;;
esac
done
# quick function to get a value from the HBase config file
distMode=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed`
if [ "$distMode" == 'false' ]; then
if [ $RR_RS -ne 1 ] || [ $RR_MASTER -ne 1 ]; then
echo Cant do selective rolling restart if not running distributed
exit 1
fi
"$bin"/hbase-daemon.sh restart master
else
# stop all masters before re-start to avoid races for master znode
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" stop master
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_BACKUP_MASTERS}" stop master-backup
if [ $RR_MASTER -eq 1 ]; then
# stop all masters before re-start to avoid races for master znode
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" stop master
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_BACKUP_MASTERS}" stop master-backup
# make sure the master znode has been deleted before continuing
zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
zmaster=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.master`
if [ "$zmaster" == "null" ]; then zmaster="master"; fi
zmaster=$zparent/$zmaster
echo -n "Waiting for Master ZNode ${zmaster} to expire"
while bin/hbase zkcli stat $zmaster >/dev/null 2>&1; do
echo -n "."
sleep 1
done
echo #force a newline
# make sure the master znode has been deleted before continuing
zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
zmaster=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.master`
if [ "$zmaster" == "null" ]; then zmaster="master"; fi
zmaster=$zparent/$zmaster
echo -n "Waiting for Master ZNode ${zmaster} to expire"
while bin/hbase zkcli stat $zmaster >/dev/null 2>&1; do
echo -n "."
sleep 1
done
echo #force a newline
# all masters are down, now restart
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_BACKUP_MASTERS}" start master-backup
# all masters are down, now restart
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_BACKUP_MASTERS}" start master-backup
echo "Wait a minute for master to come up join cluster"
sleep 60
echo "Wait a minute for master to come up join cluster"
sleep 60
# Master joing cluster will start in cleaning out regions in transition.
# Wait until the master has cleaned out regions in transition before
# giving it a bunch of work to do; master is vulnerable during startup
zunassigned=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.unassigned`
if [ "$zunassigned" == "null" ]; then zunassigned="unassigned"; fi
zunassigned="$zparent/$zunassigned"
echo -n "Waiting for ${zunassigned} to empty"
while true ; do
unassigned=`$bin/hbase zkcli stat ${zunassigned} 2>&1 |grep -e 'numChildren = '|sed -e 's,numChildren = ,,'`
if test 0 -eq ${unassigned}
then
break
else
echo -n " ${unassigned}"
fi
sleep 1
done
# unlike the masters, roll all regionservers one-at-a-time
export HBASE_SLAVE_PARALLEL=false
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_REGIONSERVERS}" restart regionserver
# Master joing cluster will start in cleaning out regions in transition.
# Wait until the master has cleaned out regions in transition before
# giving it a bunch of work to do; master is vulnerable during startup
zunassigned=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.unassigned`
if [ "$zunassigned" == "null" ]; then zunassigned="unassigned"; fi
zunassigned="$zparent/$zunassigned"
echo -n "Waiting for ${zunassigned} to empty"
while true ; do
unassigned=`$bin/hbase zkcli stat ${zunassigned} 2>&1 |grep -e 'numChildren = '|sed -e 's,numChildren = ,,'`
if test 0 -eq ${unassigned}
then
break
else
echo -n " ${unassigned}"
fi
sleep 1
done
fi
if [ $RR_RS -eq 1 ]; then
# unlike the masters, roll all regionservers one-at-a-time
export HBASE_SLAVE_PARALLEL=false
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_REGIONSERVERS}" restart regionserver
fi
fi