HBASE-5844 Delete the region servers znode after a regions server crash

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1334028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-05-04 15:19:47 +00:00
parent d9c3cf0913
commit 49731c73dc
2 changed files with 69 additions and 5 deletions

View File

@ -71,6 +71,15 @@ hbase_rotate_log ()
fi fi
} }
cleanZNode() {
if [ -f $HBASE_ZNODE_FILE ]; then
#call ZK to delete the node
ZNODE=`cat $HBASE_ZNODE_FILE`
$bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
rm $HBASE_ZNODE_FILE
fi
}
wait_until_done () wait_until_done ()
{ {
p=$1 p=$1
@ -121,6 +130,7 @@ logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out
loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc
loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}" loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}"
pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode
if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then
export HBASE_GC_OPTS=" -Xloggc:${loggc}" export HBASE_GC_OPTS=" -Xloggc:${loggc}"
@ -131,6 +141,9 @@ if [ "$HBASE_NICENESS" = "" ]; then
export HBASE_NICENESS=0 export HBASE_NICENESS=0
fi fi
thiscmd=$0
args=$@
case $startStop in case $startStop in
(start) (start)
@ -141,18 +154,23 @@ case $startStop in
exit 1 exit 1
fi fi
fi fi
nohup $thiscmd --config "${HBASE_CONF_DIR}" internal_start $command $args < /dev/null > /dev/null 2>&1 &
;;
(internal_start)
hbase_rotate_log $logout hbase_rotate_log $logout
hbase_rotate_log $loggc hbase_rotate_log $loggc
echo starting $command, logging to $logout echo starting $command, logging to $logout
# Add to the command log file vital stats on our environment. # Add to the command log file vital stats on our environment.
echo "`date` Starting $command on `hostname`" >> $loglog echo "`date` Starting $command on `hostname`" >> $loglog
echo "`ulimit -a`" >> $loglog 2>&1 echo "`ulimit -a`" >> $loglog 2>&1
nohup nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \ nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \
--config "${HBASE_CONF_DIR}" \ --config "${HBASE_CONF_DIR}" \
$command "$@" $startStop > "$logout" 2>&1 < /dev/null & $command "$@" start > "$logout" &
echo $! > $pid echo $! > $pid
sleep 1; head "$logout" sleep 1; head "$logout"
wait
cleanZNode
;; ;;
(stop) (stop)
@ -178,8 +196,6 @@ case $startStop in
;; ;;
(restart) (restart)
thiscmd=$0
args=$@
# stop the command # stop the command
$thiscmd --config "${HBASE_CONF_DIR}" stop $command $args & $thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
wait_until_done $! wait_until_done $!
@ -193,6 +209,7 @@ case $startStop in
wait_until_done $! wait_until_done $!
;; ;;
(*) (*)
echo $usage echo $usage
exit 1 exit 1

View File

@ -19,6 +19,9 @@
*/ */
package org.apache.hadoop.hbase.regionserver; package org.apache.hadoop.hbase.regionserver;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.StringWriter; import java.io.StringWriter;
import java.lang.Thread.UncaughtExceptionHandler; import java.lang.Thread.UncaughtExceptionHandler;
@ -725,6 +728,9 @@ public class HRegionServer extends RegionServer
} catch (KeeperException e) { } catch (KeeperException e) {
LOG.warn("Failed deleting my ephemeral node", e); LOG.warn("Failed deleting my ephemeral node", e);
} }
// We may have failed to delete the znode at the previous step, but
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
deleteMyEphemeralNodeOnDisk();
this.zooKeeper.close(); this.zooKeeper.close();
LOG.info("stopping server " + this.serverNameFromMasterPOV + LOG.info("stopping server " + this.serverNameFromMasterPOV +
"; zookeeper connection closed."); "; zookeeper connection closed.");
@ -841,7 +847,8 @@ public class HRegionServer extends RegionServer
// No regions in RIT, we could stop waiting now. // No regions in RIT, we could stop waiting now.
if (this.regionsInTransitionInRS.isEmpty()) { if (this.regionsInTransitionInRS.isEmpty()) {
if (!isOnlineRegionsEmpty()) { if (!isOnlineRegionsEmpty()) {
LOG.info("We were exiting though online regions are not empty, because some regions failed closing"); LOG.info("We were exiting though online regions are not empty," +
" because some regions failed closing");
} }
break; break;
} }
@ -911,6 +918,9 @@ public class HRegionServer extends RegionServer
// Set our ephemeral znode up in zookeeper now we have a name. // Set our ephemeral znode up in zookeeper now we have a name.
createMyEphemeralNode(); createMyEphemeralNode();
// Save it in a file, this will allow to see if we crash
writeMyEphemeralNodeOnDisk();
// Master sent us hbase.rootdir to use. Should be fully qualified // Master sent us hbase.rootdir to use. Should be fully qualified
// path with file system specification included. Set 'fs.defaultFS' // path with file system specification included. Set 'fs.defaultFS'
// to match the filesystem on hbase.rootdir else underlying hadoop hdfs // to match the filesystem on hbase.rootdir else underlying hadoop hdfs
@ -945,11 +955,48 @@ public class HRegionServer extends RegionServer
return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString()); return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
} }
private String getMyEphemeralNodeFileName() {
return System.getenv().get("HBASE_ZNODE_FILE");
}
private void createMyEphemeralNode() throws KeeperException { private void createMyEphemeralNode() throws KeeperException {
ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(), ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
HConstants.EMPTY_BYTE_ARRAY); HConstants.EMPTY_BYTE_ARRAY);
} }
private void writeMyEphemeralNodeOnDisk() throws IOException {
String fileName = getMyEphemeralNodeFileName();
if (fileName == null) {
LOG.warn("No filename given to save the znode used, it won't be saved " +
"(Environment variable HBASE_ZNODE_FILE is not set).");
return;
}
FileWriter fstream = new FileWriter(fileName);
BufferedWriter out = new BufferedWriter(fstream);
try {
out.write(getMyEphemeralNodePath() + "\n");
} finally {
try {
out.close();
} finally {
fstream.close();
}
}
}
private void deleteMyEphemeralNodeOnDisk(){
String fileName = getMyEphemeralNodeFileName();
if (fileName == null){
return;
}
File f = new File(fileName);
f.delete();
}
private void deleteMyEphemeralNode() throws KeeperException { private void deleteMyEphemeralNode() throws KeeperException {
ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath()); ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
} }