229 lines
7.8 KiB
Bash
Executable File
229 lines
7.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
#/**
|
|
# * Licensed to the Apache Software Foundation (ASF) under one
|
|
# * or more contributor license agreements. See the NOTICE file
|
|
# * distributed with this work for additional information
|
|
# * regarding copyright ownership. The ASF licenses this file
|
|
# * to you under the Apache License, Version 2.0 (the
|
|
# * "License"); you may not use this file except in compliance
|
|
# * with the License. You may obtain a copy of the License at
|
|
# *
|
|
# * http://www.apache.org/licenses/LICENSE-2.0
|
|
# *
|
|
# * Unless required by applicable law or agreed to in writing, software
|
|
# * distributed under the License is distributed on an "AS IS" BASIS,
|
|
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# * See the License for the specific language governing permissions and
|
|
# * limitations under the License.
|
|
# */
|
|
#
|
|
# Run a shell command on all regionserver hosts.
|
|
#
|
|
# Environment Variables
|
|
#
|
|
# HBASE_REGIONSERVERS File naming remote hosts.
|
|
# Default is ${HADOOP_CONF_DIR}/regionservers
|
|
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
|
|
# HBASE_CONF_DIR Alternate hbase conf dir. Default is ${HBASE_HOME}/conf.
|
|
# HBASE_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
|
|
# HBASE_SLAVE_TIMEOUT Seconds to wait for timing out a remote command.
|
|
# HBASE_SSH_OPTS Options passed to ssh when running remote commands.
|
|
#
|
|
# Modelled after $HADOOP_HOME/bin/slaves.sh.
|
|
|
|
usage_str="Usage: `basename $0` [--config <hbase-confdir>] [--autostart-window-size <window size in hours>]\
|
|
[--autostart-window-retry-limit <retry count limit for autostart>] [--autostart] [--rs-only] [--master-only] \
|
|
[--graceful] [--maxthreads xx] [--noack] [--movetimeout]]"
|
|
|
|
function usage() {
|
|
echo "${usage_str}"
|
|
}
|
|
|
|
bin=`dirname "$0"`
|
|
bin=`cd "$bin">/dev/null; pwd`
|
|
|
|
# default autostart args value indicating infinite window size and no retry limit
|
|
AUTOSTART_WINDOW_SIZE=0
|
|
AUTOSTART_WINDOW_RETRY_LIMIT=0
|
|
|
|
. "$bin"/hbase-config.sh
|
|
|
|
# start hbase daemons
|
|
errCode=$?
|
|
if [ $errCode -ne 0 ]
|
|
then
|
|
exit $errCode
|
|
fi
|
|
|
|
RR_RS=1
|
|
RR_MASTER=1
|
|
RR_GRACEFUL=0
|
|
RR_MAXTHREADS=1
|
|
RR_MOVE_TIMEOUT=2147483647
|
|
START_CMD_NON_DIST_MODE=restart
|
|
START_CMD_DIST_MODE=start
|
|
RESTART_CMD_REGIONSERVER=restart
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--rs-only|-r)
|
|
RR_RS=1
|
|
RR_MASTER=0
|
|
RR_GRACEFUL=0
|
|
shift
|
|
;;
|
|
--autostart)
|
|
START_CMD_NON_DIST_MODE="--autostart-window-size ${AUTOSTART_WINDOW_SIZE} --autostart-window-retry-limit ${AUTOSTART_WINDOW_RETRY_LIMIT} autorestart"
|
|
START_CMD_DIST_MODE="--autostart-window-size ${AUTOSTART_WINDOW_SIZE} --autostart-window-retry-limit ${AUTOSTART_WINDOW_RETRY_LIMIT} autostart"
|
|
RESTART_CMD_REGIONSERVER="--autostart-window-size ${AUTOSTART_WINDOW_SIZE} --autostart-window-retry-limit ${AUTOSTART_WINDOW_RETRY_LIMIT} autorestart"
|
|
shift
|
|
;;
|
|
--master-only)
|
|
RR_RS=0
|
|
RR_MASTER=1
|
|
RR_GRACEFUL=0
|
|
shift
|
|
;;
|
|
--graceful)
|
|
RR_RS=0
|
|
RR_MASTER=0
|
|
RR_GRACEFUL=1
|
|
shift
|
|
;;
|
|
--maxthreads)
|
|
shift
|
|
RR_MAXTHREADS=$1
|
|
shift
|
|
;;
|
|
--noack)
|
|
RR_NOACK="--noack"
|
|
shift
|
|
;;
|
|
--movetimeout)
|
|
shift
|
|
RR_MOVE_TIMEOUT=$1
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo Bad argument: $1
|
|
usage
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# quick function to get a value from the HBase config file
|
|
# HBASE-6504 - only take the first line of the output in case verbose gc is on
|
|
distMode=`HBASE_CONF_DIR=${HBASE_CONF_DIR} $bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed | head -n 1`
|
|
if [ "$distMode" == 'false' ]; then
|
|
if [ $RR_RS -ne 1 ] || [ $RR_MASTER -ne 1 ]; then
|
|
echo Cant do selective rolling restart if not running distributed
|
|
exit 1
|
|
fi
|
|
"$bin"/hbase-daemon.sh ${START_CMD_NON_DIST_MODE} master
|
|
else
|
|
zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
|
|
if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
|
|
|
|
if [ $RR_MASTER -eq 1 ]; then
|
|
# stop all masters before re-start to avoid races for master znode
|
|
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" stop master
|
|
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
|
|
--hosts "${HBASE_BACKUP_MASTERS}" stop master-backup
|
|
|
|
# make sure the master znode has been deleted before continuing
|
|
zmaster=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.master`
|
|
if [ "$zmaster" == "null" ]; then zmaster="master"; fi
|
|
zmaster=$zparent/$zmaster
|
|
echo -n "Waiting for Master ZNode ${zmaster} to expire"
|
|
echo
|
|
while ! "$bin"/hbase zkcli stat $zmaster 2>&1 | grep "Node does not exist"; do
|
|
echo -n "."
|
|
sleep 1
|
|
done
|
|
echo #force a newline
|
|
|
|
# all masters are down, now restart
|
|
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" ${START_CMD_DIST_MODE} master
|
|
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
|
|
--hosts "${HBASE_BACKUP_MASTERS}" ${START_CMD_DIST_MODE} master-backup
|
|
|
|
echo "Wait a minute for master to come up join cluster"
|
|
sleep 60
|
|
|
|
# Master joing cluster will start in cleaning out regions in transition.
|
|
# Wait until the master has cleaned out regions in transition before
|
|
# giving it a bunch of work to do; master is vulnerable during startup
|
|
zunassigned=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.unassigned`
|
|
if [ "$zunassigned" == "null" ]; then zunassigned="region-in-transition"; fi
|
|
zunassigned="$zparent/$zunassigned"
|
|
# Checking if /hbase/region-in-transition exist
|
|
ritZnodeCheck=`$bin/hbase zkcli stat ${zunassigned} 2>&1 | tail -1 \
|
|
| grep "Node does not exist:" >/dev/null`
|
|
ret=$?
|
|
if test 0 -eq ${ret}
|
|
then
|
|
echo "Znode ${zunassigned} does not exist"
|
|
else
|
|
echo -n "Waiting for ${zunassigned} to empty"
|
|
while true ; do
|
|
unassigned=`$bin/hbase zkcli stat ${zunassigned} 2>&1 \
|
|
| grep -e 'numChildren = '|sed -e 's,numChildren = ,,'`
|
|
if test 0 -eq ${unassigned}
|
|
then
|
|
echo
|
|
break
|
|
else
|
|
echo -n " ${unassigned}"
|
|
fi
|
|
sleep 1
|
|
done
|
|
fi
|
|
fi
|
|
|
|
if [ $RR_RS -eq 1 ]; then
|
|
# unlike the masters, roll all regionservers one-at-a-time
|
|
export HBASE_SLAVE_PARALLEL=false
|
|
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
|
|
--hosts "${HBASE_REGIONSERVERS}" ${RESTART_CMD_REGIONSERVER} regionserver
|
|
fi
|
|
|
|
if [ $RR_GRACEFUL -eq 1 ]; then
|
|
# gracefully restart all online regionservers
|
|
masterport=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.master.port`
|
|
if [ "$masterport" == "null" ]; then masterport="16000"; fi
|
|
zkrs=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.rs`
|
|
if [ "$zkrs" == "null" ]; then zkrs="rs"; fi
|
|
zkrs="$zparent/$zkrs"
|
|
online_regionservers=`$bin/hbase zkcli ls $zkrs 2>&1 | tail -1 | sed "s/\[//" | sed "s/\]//"`
|
|
echo "Disabling load balancer"
|
|
HBASE_BALANCER_STATE=$(echo 'balance_switch false' | "$bin"/hbase --config "${HBASE_CONF_DIR}" shell -n | tail -1)
|
|
echo "Previous balancer state was $HBASE_BALANCER_STATE"
|
|
|
|
for rs in $online_regionservers
|
|
do
|
|
rs_parts=(${rs//,/ })
|
|
hostname=${rs_parts[0]}
|
|
port=${rs_parts[1]}
|
|
if [ "$port" -eq "$masterport" ]; then
|
|
echo "Skipping regionserver on master machine $hostname:$port"
|
|
continue
|
|
else
|
|
echo "Gracefully restarting: $hostname"
|
|
"$bin"/graceful_stop.sh --config ${HBASE_CONF_DIR} --restart --reload -nob --maxthreads \
|
|
${RR_MAXTHREADS} ${RR_NOACK} --movetimeout ${RR_MOVE_TIMEOUT} $hostname
|
|
sleep 1
|
|
fi
|
|
done
|
|
if [ "$HBASE_BALANCER_STATE" != "false" ]; then
|
|
echo "Restoring balancer state to $HBASE_BALANCER_STATE"
|
|
echo "balance_switch $HBASE_BALANCER_STATE" | "$bin"/hbase --config "${HBASE_CONF_DIR}" shell &> /dev/null
|
|
fi
|
|
fi
|
|
fi
|