HBASE-5844 Delete the region servers znode after a regions server crash
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1334028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d9c3cf0913
commit
49731c73dc
|
@ -71,6 +71,15 @@ hbase_rotate_log ()
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cleanZNode() {
|
||||||
|
if [ -f $HBASE_ZNODE_FILE ]; then
|
||||||
|
#call ZK to delete the node
|
||||||
|
ZNODE=`cat $HBASE_ZNODE_FILE`
|
||||||
|
$bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
|
||||||
|
rm $HBASE_ZNODE_FILE
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
wait_until_done ()
|
wait_until_done ()
|
||||||
{
|
{
|
||||||
p=$1
|
p=$1
|
||||||
|
@ -121,6 +130,7 @@ logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out
|
||||||
loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc
|
loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc
|
||||||
loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}"
|
loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}"
|
||||||
pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
|
pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
|
||||||
|
export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode
|
||||||
|
|
||||||
if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then
|
if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then
|
||||||
export HBASE_GC_OPTS=" -Xloggc:${loggc}"
|
export HBASE_GC_OPTS=" -Xloggc:${loggc}"
|
||||||
|
@ -131,6 +141,9 @@ if [ "$HBASE_NICENESS" = "" ]; then
|
||||||
export HBASE_NICENESS=0
|
export HBASE_NICENESS=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
thiscmd=$0
|
||||||
|
args=$@
|
||||||
|
|
||||||
case $startStop in
|
case $startStop in
|
||||||
|
|
||||||
(start)
|
(start)
|
||||||
|
@ -141,18 +154,23 @@ case $startStop in
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
nohup $thiscmd --config "${HBASE_CONF_DIR}" internal_start $command $args < /dev/null > /dev/null 2>&1 &
|
||||||
|
;;
|
||||||
|
|
||||||
|
(internal_start)
|
||||||
hbase_rotate_log $logout
|
hbase_rotate_log $logout
|
||||||
hbase_rotate_log $loggc
|
hbase_rotate_log $loggc
|
||||||
echo starting $command, logging to $logout
|
echo starting $command, logging to $logout
|
||||||
# Add to the command log file vital stats on our environment.
|
# Add to the command log file vital stats on our environment.
|
||||||
echo "`date` Starting $command on `hostname`" >> $loglog
|
echo "`date` Starting $command on `hostname`" >> $loglog
|
||||||
echo "`ulimit -a`" >> $loglog 2>&1
|
echo "`ulimit -a`" >> $loglog 2>&1
|
||||||
nohup nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \
|
nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \
|
||||||
--config "${HBASE_CONF_DIR}" \
|
--config "${HBASE_CONF_DIR}" \
|
||||||
$command "$@" $startStop > "$logout" 2>&1 < /dev/null &
|
$command "$@" start > "$logout" &
|
||||||
echo $! > $pid
|
echo $! > $pid
|
||||||
sleep 1; head "$logout"
|
sleep 1; head "$logout"
|
||||||
|
wait
|
||||||
|
cleanZNode
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(stop)
|
(stop)
|
||||||
|
@ -178,8 +196,6 @@ case $startStop in
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(restart)
|
(restart)
|
||||||
thiscmd=$0
|
|
||||||
args=$@
|
|
||||||
# stop the command
|
# stop the command
|
||||||
$thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
|
$thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
|
||||||
wait_until_done $!
|
wait_until_done $!
|
||||||
|
@ -193,6 +209,7 @@ case $startStop in
|
||||||
wait_until_done $!
|
wait_until_done $!
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
|
||||||
(*)
|
(*)
|
||||||
echo $usage
|
echo $usage
|
||||||
exit 1
|
exit 1
|
||||||
|
|
|
@ -19,6 +19,9 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.regionserver;
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.lang.Thread.UncaughtExceptionHandler;
|
import java.lang.Thread.UncaughtExceptionHandler;
|
||||||
|
@ -725,6 +728,9 @@ public class HRegionServer extends RegionServer
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.warn("Failed deleting my ephemeral node", e);
|
LOG.warn("Failed deleting my ephemeral node", e);
|
||||||
}
|
}
|
||||||
|
// We may have failed to delete the znode at the previous step, but
|
||||||
|
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
|
||||||
|
deleteMyEphemeralNodeOnDisk();
|
||||||
this.zooKeeper.close();
|
this.zooKeeper.close();
|
||||||
LOG.info("stopping server " + this.serverNameFromMasterPOV +
|
LOG.info("stopping server " + this.serverNameFromMasterPOV +
|
||||||
"; zookeeper connection closed.");
|
"; zookeeper connection closed.");
|
||||||
|
@ -841,7 +847,8 @@ public class HRegionServer extends RegionServer
|
||||||
// No regions in RIT, we could stop waiting now.
|
// No regions in RIT, we could stop waiting now.
|
||||||
if (this.regionsInTransitionInRS.isEmpty()) {
|
if (this.regionsInTransitionInRS.isEmpty()) {
|
||||||
if (!isOnlineRegionsEmpty()) {
|
if (!isOnlineRegionsEmpty()) {
|
||||||
LOG.info("We were exiting though online regions are not empty, because some regions failed closing");
|
LOG.info("We were exiting though online regions are not empty," +
|
||||||
|
" because some regions failed closing");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -911,6 +918,9 @@ public class HRegionServer extends RegionServer
|
||||||
// Set our ephemeral znode up in zookeeper now we have a name.
|
// Set our ephemeral znode up in zookeeper now we have a name.
|
||||||
createMyEphemeralNode();
|
createMyEphemeralNode();
|
||||||
|
|
||||||
|
// Save it in a file, this will allow to see if we crash
|
||||||
|
writeMyEphemeralNodeOnDisk();
|
||||||
|
|
||||||
// Master sent us hbase.rootdir to use. Should be fully qualified
|
// Master sent us hbase.rootdir to use. Should be fully qualified
|
||||||
// path with file system specification included. Set 'fs.defaultFS'
|
// path with file system specification included. Set 'fs.defaultFS'
|
||||||
// to match the filesystem on hbase.rootdir else underlying hadoop hdfs
|
// to match the filesystem on hbase.rootdir else underlying hadoop hdfs
|
||||||
|
@ -945,11 +955,48 @@ public class HRegionServer extends RegionServer
|
||||||
return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
|
return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getMyEphemeralNodeFileName() {
|
||||||
|
return System.getenv().get("HBASE_ZNODE_FILE");
|
||||||
|
}
|
||||||
|
|
||||||
private void createMyEphemeralNode() throws KeeperException {
|
private void createMyEphemeralNode() throws KeeperException {
|
||||||
ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
|
ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
|
||||||
HConstants.EMPTY_BYTE_ARRAY);
|
HConstants.EMPTY_BYTE_ARRAY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void writeMyEphemeralNodeOnDisk() throws IOException {
|
||||||
|
String fileName = getMyEphemeralNodeFileName();
|
||||||
|
|
||||||
|
if (fileName == null) {
|
||||||
|
LOG.warn("No filename given to save the znode used, it won't be saved " +
|
||||||
|
"(Environment variable HBASE_ZNODE_FILE is not set).");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileWriter fstream = new FileWriter(fileName);
|
||||||
|
BufferedWriter out = new BufferedWriter(fstream);
|
||||||
|
try {
|
||||||
|
out.write(getMyEphemeralNodePath() + "\n");
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
out.close();
|
||||||
|
} finally {
|
||||||
|
fstream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteMyEphemeralNodeOnDisk(){
|
||||||
|
String fileName = getMyEphemeralNodeFileName();
|
||||||
|
|
||||||
|
if (fileName == null){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
File f = new File(fileName);
|
||||||
|
f.delete();
|
||||||
|
}
|
||||||
|
|
||||||
private void deleteMyEphemeralNode() throws KeeperException {
|
private void deleteMyEphemeralNode() throws KeeperException {
|
||||||
ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
|
ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue