HBASE-5939 Add an autorestart option in the start scripts
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1337418 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
83ab15e2ed
commit
e8f27c7408
|
@ -33,7 +33,7 @@
|
||||||
# Modelled after $HADOOP_HOME/bin/hadoop-daemon.sh
|
# Modelled after $HADOOP_HOME/bin/hadoop-daemon.sh
|
||||||
|
|
||||||
usage="Usage: hbase-daemon.sh [--config <conf-dir>]\
|
usage="Usage: hbase-daemon.sh [--config <conf-dir>]\
|
||||||
(start|stop|restart) <hbase-command> \
|
(start|stop|restart|autorestart) <hbase-command> \
|
||||||
<args...>"
|
<args...>"
|
||||||
|
|
||||||
# if no args specified, show usage
|
# if no args specified, show usage
|
||||||
|
@ -80,6 +80,17 @@ cleanZNode() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
check_before_start(){
|
||||||
|
#ckeck if the process is not running
|
||||||
|
mkdir -p "$HBASE_PID_DIR"
|
||||||
|
if [ -f $pid ]; then
|
||||||
|
if kill -0 `cat $pid` > /dev/null 2>&1; then
|
||||||
|
echo $command running as process `cat $pid`. Stop it first.
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
wait_until_done ()
|
wait_until_done ()
|
||||||
{
|
{
|
||||||
p=$1
|
p=$1
|
||||||
|
@ -122,15 +133,18 @@ if [ "$JAVA_HOME" = "" ]; then
|
||||||
echo "Error: JAVA_HOME is not set."
|
echo "Error: JAVA_HOME is not set."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
JAVA=$JAVA_HOME/bin/java
|
JAVA=$JAVA_HOME/bin/java
|
||||||
export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME
|
export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME
|
||||||
export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log
|
export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log
|
||||||
export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"}
|
export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"}
|
||||||
logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out
|
logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out
|
||||||
loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc
|
loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc
|
||||||
loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}"
|
loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}"
|
||||||
pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
|
pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
|
||||||
export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode
|
export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode
|
||||||
|
export HBASE_START_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.autorestart
|
||||||
|
|
||||||
|
|
||||||
if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then
|
if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then
|
||||||
export HBASE_GC_OPTS=" -Xloggc:${loggc}"
|
export HBASE_GC_OPTS=" -Xloggc:${loggc}"
|
||||||
|
@ -146,18 +160,17 @@ args=$@
|
||||||
|
|
||||||
case $startStop in
|
case $startStop in
|
||||||
|
|
||||||
(start)
|
(start)
|
||||||
mkdir -p "$HBASE_PID_DIR"
|
check_before_start
|
||||||
if [ -f $pid ]; then
|
|
||||||
if kill -0 `cat $pid` > /dev/null 2>&1; then
|
|
||||||
echo $command running as process `cat $pid`. Stop it first.
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
nohup $thiscmd --config "${HBASE_CONF_DIR}" internal_start $command $args < /dev/null > /dev/null 2>&1 &
|
nohup $thiscmd --config "${HBASE_CONF_DIR}" internal_start $command $args < /dev/null > /dev/null 2>&1 &
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(internal_start)
|
(autorestart)
|
||||||
|
check_before_start
|
||||||
|
nohup $thiscmd --config "${HBASE_CONF_DIR}" internal_autorestart $command $args < /dev/null > /dev/null 2>&1 &
|
||||||
|
;;
|
||||||
|
|
||||||
|
(internal_start)
|
||||||
hbase_rotate_log $logout
|
hbase_rotate_log $logout
|
||||||
hbase_rotate_log $loggc
|
hbase_rotate_log $loggc
|
||||||
echo starting $command, logging to $logout
|
echo starting $command, logging to $logout
|
||||||
|
@ -171,31 +184,74 @@ case $startStop in
|
||||||
sleep 1; head "$logout"
|
sleep 1; head "$logout"
|
||||||
wait
|
wait
|
||||||
cleanZNode
|
cleanZNode
|
||||||
|
;;
|
||||||
|
|
||||||
|
(internal_autorestart)
|
||||||
|
touch "$HBASE_START_FILE"
|
||||||
|
#keep starting the command until asked to stop. Reloop on software crash
|
||||||
|
while true
|
||||||
|
do
|
||||||
|
lastLaunchDate=`date +%s`
|
||||||
|
$thiscmd --config "${HBASE_CONF_DIR}" internal_start $command $args
|
||||||
|
|
||||||
|
#if the file does not exist it means that it was not stopped properly by the stop command
|
||||||
|
if [ ! -f "$HBASE_START_FILE" ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
#if the cluster is being stopped then do not restart it again.
|
||||||
|
zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
|
||||||
|
if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
|
||||||
|
zshutdown=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.state`
|
||||||
|
if [ "$zshutdown" == "null" ]; then zshutdown="shutdown"; fi
|
||||||
|
zFullShutdown=$zparent/$zshutdown
|
||||||
|
$bin/hbase zkcli stat $zFullShutdown 2>&1 | grep "Node does not exist" 1>/dev/null 2>&1
|
||||||
|
#grep returns 0 if it found something, 1 otherwise
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
#If ZooKeeper cannot be found, then do not restart
|
||||||
|
$bin/hbase zkcli stat $zFullShutdown 2>&1 | grep Exception | grep ConnectionLoss 1>/dev/null 2>&1
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
#if it was launched less than 5 minutes ago, then wait for 5 minutes before starting it again.
|
||||||
|
curDate=`date +%s`
|
||||||
|
limitDate=`expr $lastLaunchDate + 300`
|
||||||
|
if [ $limitDate -gt $curDate ]; then
|
||||||
|
sleep 300
|
||||||
|
fi
|
||||||
|
done
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(stop)
|
(stop)
|
||||||
|
rm -f "$HBASE_START_FILE"
|
||||||
if [ -f $pid ]; then
|
if [ -f $pid ]; then
|
||||||
# kill -0 == see if the PID exists
|
pidToKill=`cat $pid`
|
||||||
if kill -0 `cat $pid` > /dev/null 2>&1; then
|
# kill -0 == see if the PID exists
|
||||||
|
if kill -0 $pidToKill > /dev/null 2>&1; then
|
||||||
echo -n stopping $command
|
echo -n stopping $command
|
||||||
echo "`date` Terminating $command" >> $loglog
|
echo "`date` Terminating $command" >> $loglog
|
||||||
kill `cat $pid` > /dev/null 2>&1
|
kill $pidToKill > /dev/null 2>&1
|
||||||
while kill -0 `cat $pid` > /dev/null 2>&1; do
|
while kill -0 $pidToKill > /dev/null 2>&1;
|
||||||
echo -n "."
|
do
|
||||||
sleep 1;
|
echo -n "."
|
||||||
done
|
sleep 1;
|
||||||
|
done
|
||||||
rm $pid
|
rm $pid
|
||||||
echo
|
echo
|
||||||
else
|
else
|
||||||
retval=$?
|
retval=$?
|
||||||
echo no $command to stop because kill -0 of pid `cat $pid` failed with status $retval
|
echo no $command to stop because kill -0 of pid $pidToKill failed with status $retval
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo no $command to stop because no pid file $pid
|
echo no $command to stop because no pid file $pid
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(restart)
|
(restart)
|
||||||
# stop the command
|
# stop the command
|
||||||
$thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
|
$thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
|
||||||
wait_until_done $!
|
wait_until_done $!
|
||||||
|
@ -207,12 +263,10 @@ case $startStop in
|
||||||
# start the command
|
# start the command
|
||||||
$thiscmd --config "${HBASE_CONF_DIR}" start $command $args &
|
$thiscmd --config "${HBASE_CONF_DIR}" start $command $args &
|
||||||
wait_until_done $!
|
wait_until_done $!
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
|
||||||
(*)
|
|
||||||
echo $usage
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
|
|
||||||
|
(*)
|
||||||
|
echo $usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
|
@ -38,17 +38,25 @@ then
|
||||||
exit $errCode
|
exit $errCode
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if [ "$1" = "autorestart" ]
|
||||||
|
then
|
||||||
|
commandToRun="autorestart"
|
||||||
|
else
|
||||||
|
commandToRun="start"
|
||||||
|
fi
|
||||||
|
|
||||||
distMode=`$bin/hbase --config "$HBASE_CONF_DIR" org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed`
|
distMode=`$bin/hbase --config "$HBASE_CONF_DIR" org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed`
|
||||||
|
|
||||||
|
|
||||||
if [ "$distMode" == 'false' ]
|
if [ "$distMode" == 'false' ]
|
||||||
then
|
then
|
||||||
"$bin"/hbase-daemon.sh start master
|
"$bin"/hbase-daemon.sh $commandToRun master
|
||||||
else
|
else
|
||||||
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start zookeeper
|
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" $commandToRun zookeeper
|
||||||
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master
|
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" $commandToRun master
|
||||||
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
|
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
|
||||||
--hosts "${HBASE_REGIONSERVERS}" start regionserver
|
--hosts "${HBASE_REGIONSERVERS}" $commandToRun regionserver
|
||||||
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
|
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
|
||||||
--hosts "${HBASE_BACKUP_MASTERS}" start master-backup
|
--hosts "${HBASE_BACKUP_MASTERS}" $commandToRun master-backup
|
||||||
fi
|
fi
|
||||||
|
|
Loading…
Reference in New Issue