HBASE-5926 Delete the master znode after a master crash
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1340185 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
60d28c645d
commit
4c61d4a965
|
@ -73,9 +73,13 @@ hbase_rotate_log ()
|
||||||
|
|
||||||
cleanZNode() {
|
cleanZNode() {
|
||||||
if [ -f $HBASE_ZNODE_FILE ]; then
|
if [ -f $HBASE_ZNODE_FILE ]; then
|
||||||
#call ZK to delete the node
|
if [ "$command" = "master" ]; then
|
||||||
ZNODE=`cat $HBASE_ZNODE_FILE`
|
$bin/hbase master clear > /dev/null 2>&1
|
||||||
$bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
|
else
|
||||||
|
#call ZK to delete the node
|
||||||
|
ZNODE=`cat $HBASE_ZNODE_FILE`
|
||||||
|
$bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
|
||||||
|
fi
|
||||||
rm $HBASE_ZNODE_FILE
|
rm $HBASE_ZNODE_FILE
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.hbase.ZNodeClearer;
|
||||||
import org.apache.hadoop.hbase.DeserializationException;
|
import org.apache.hadoop.hbase.DeserializationException;
|
||||||
import org.apache.hadoop.hbase.Server;
|
import org.apache.hadoop.hbase.Server;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
@ -141,12 +142,17 @@ class ActiveMasterManager extends ZooKeeperListener {
|
||||||
try {
|
try {
|
||||||
String backupZNode =
|
String backupZNode =
|
||||||
ZKUtil.joinZNode(this.watcher.backupMasterAddressesZNode, this.sn.toString());
|
ZKUtil.joinZNode(this.watcher.backupMasterAddressesZNode, this.sn.toString());
|
||||||
if (MasterAddressTracker.setMasterAddress(this.watcher, this.watcher.getMasterAddressZNode(), this.sn)) {
|
if (MasterAddressTracker.setMasterAddress(this.watcher,
|
||||||
|
this.watcher.getMasterAddressZNode(), this.sn)) {
|
||||||
|
|
||||||
// If we were a backup master before, delete our ZNode from the backup
|
// If we were a backup master before, delete our ZNode from the backup
|
||||||
// master directory since we are the active now
|
// master directory since we are the active now
|
||||||
LOG.info("Deleting ZNode for " + backupZNode + " from backup master directory");
|
LOG.info("Deleting ZNode for " + backupZNode + " from backup master directory");
|
||||||
ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
|
ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
|
||||||
|
|
||||||
|
// Save the znode in a file, this will allow to check if we crash in the launch scripts
|
||||||
|
ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString());
|
||||||
|
|
||||||
// We are the master, return
|
// We are the master, return
|
||||||
startupStatus.setStatus("Successfully registered as active master.");
|
startupStatus.setStatus("Successfully registered as active master.");
|
||||||
this.clusterHasActiveMaster.set(true);
|
this.clusterHasActiveMaster.set(true);
|
||||||
|
@ -189,6 +195,10 @@ class ActiveMasterManager extends ZooKeeperListener {
|
||||||
currentMaster + "; master was restarted? Deleting node.");
|
currentMaster + "; master was restarted? Deleting node.");
|
||||||
// Hurry along the expiration of the znode.
|
// Hurry along the expiration of the znode.
|
||||||
ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
|
ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
|
||||||
|
|
||||||
|
// We may have failed to delete the znode at the previous step, but
|
||||||
|
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
|
||||||
|
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
|
||||||
} else {
|
} else {
|
||||||
msg = "Another master is the active master, " + currentMaster +
|
msg = "Another master is the active master, " + currentMaster +
|
||||||
"; waiting to become the next active master";
|
"; waiting to become the next active master";
|
||||||
|
@ -249,6 +259,9 @@ class ActiveMasterManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
if (activeMaster != null && activeMaster.equals(this.sn)) {
|
if (activeMaster != null && activeMaster.equals(this.sn)) {
|
||||||
ZKUtil.deleteNode(watcher, watcher.getMasterAddressZNode());
|
ZKUtil.deleteNode(watcher, watcher.getMasterAddressZNode());
|
||||||
|
// We may have failed to delete the znode at the previous step, but
|
||||||
|
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
|
||||||
|
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
|
||||||
}
|
}
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);
|
LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.ZNodeClearer;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.LocalHBaseCluster;
|
import org.apache.hadoop.hbase.LocalHBaseCluster;
|
||||||
import org.apache.hadoop.hbase.MasterNotRunningException;
|
import org.apache.hadoop.hbase.MasterNotRunningException;
|
||||||
|
@ -47,9 +48,10 @@ public class HMasterCommandLine extends ServerCommandLine {
|
||||||
private static final Log LOG = LogFactory.getLog(HMasterCommandLine.class);
|
private static final Log LOG = LogFactory.getLog(HMasterCommandLine.class);
|
||||||
|
|
||||||
private static final String USAGE =
|
private static final String USAGE =
|
||||||
"Usage: Master [opts] start|stop\n" +
|
"Usage: Master [opts] start|stop|clear\n" +
|
||||||
" start Start Master. If local mode, start Master and RegionServer in same JVM\n" +
|
" start Start Master. If local mode, start Master and RegionServer in same JVM\n" +
|
||||||
" stop Start cluster shutdown; Master signals RegionServer shutdown\n" +
|
" stop Start cluster shutdown; Master signals RegionServer shutdown\n" +
|
||||||
|
" clear Delete the master znode in ZooKeeper after a master crashes\n "+
|
||||||
" where [opts] are:\n" +
|
" where [opts] are:\n" +
|
||||||
" --minServers=<servers> Minimum RegionServers needed to host user tables.\n" +
|
" --minServers=<servers> Minimum RegionServers needed to host user tables.\n" +
|
||||||
" --backup Master should start in backup mode";
|
" --backup Master should start in backup mode";
|
||||||
|
@ -105,6 +107,8 @@ public class HMasterCommandLine extends ServerCommandLine {
|
||||||
return startMaster();
|
return startMaster();
|
||||||
} else if ("stop".equals(command)) {
|
} else if ("stop".equals(command)) {
|
||||||
return stopMaster();
|
return stopMaster();
|
||||||
|
} else if ("clear".equals(command)) {
|
||||||
|
return (ZNodeClearer.clear(getConf()) ? 0 : -1);
|
||||||
} else {
|
} else {
|
||||||
usage("Invalid command: " + command);
|
usage("Invalid command: " + command);
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -19,9 +19,6 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.regionserver;
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
import java.io.BufferedWriter;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.lang.Thread.UncaughtExceptionHandler;
|
import java.lang.Thread.UncaughtExceptionHandler;
|
||||||
|
@ -82,6 +79,7 @@ import org.apache.hadoop.hbase.TableDescriptors;
|
||||||
import org.apache.hadoop.hbase.UnknownRowLockException;
|
import org.apache.hadoop.hbase.UnknownRowLockException;
|
||||||
import org.apache.hadoop.hbase.UnknownScannerException;
|
import org.apache.hadoop.hbase.UnknownScannerException;
|
||||||
import org.apache.hadoop.hbase.YouAreDeadException;
|
import org.apache.hadoop.hbase.YouAreDeadException;
|
||||||
|
import org.apache.hadoop.hbase.ZNodeClearer;
|
||||||
import org.apache.hadoop.hbase.catalog.CatalogTracker;
|
import org.apache.hadoop.hbase.catalog.CatalogTracker;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaReader;
|
import org.apache.hadoop.hbase.catalog.MetaReader;
|
||||||
|
@ -865,7 +863,7 @@ public class HRegionServer implements ClientProtocol,
|
||||||
}
|
}
|
||||||
// We may have failed to delete the znode at the previous step, but
|
// We may have failed to delete the znode at the previous step, but
|
||||||
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
|
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
|
||||||
deleteMyEphemeralNodeOnDisk();
|
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
|
||||||
this.zooKeeper.close();
|
this.zooKeeper.close();
|
||||||
LOG.info("stopping server " + this.serverNameFromMasterPOV +
|
LOG.info("stopping server " + this.serverNameFromMasterPOV +
|
||||||
"; zookeeper connection closed.");
|
"; zookeeper connection closed.");
|
||||||
|
@ -1054,7 +1052,7 @@ public class HRegionServer implements ClientProtocol,
|
||||||
createMyEphemeralNode();
|
createMyEphemeralNode();
|
||||||
|
|
||||||
// Save it in a file, this will allow to see if we crash
|
// Save it in a file, this will allow to see if we crash
|
||||||
writeMyEphemeralNodeOnDisk();
|
ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
|
||||||
|
|
||||||
// Master sent us hbase.rootdir to use. Should be fully qualified
|
// Master sent us hbase.rootdir to use. Should be fully qualified
|
||||||
// path with file system specification included. Set 'fs.defaultFS'
|
// path with file system specification included. Set 'fs.defaultFS'
|
||||||
|
@ -1086,52 +1084,11 @@ public class HRegionServer implements ClientProtocol,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getMyEphemeralNodePath() {
|
|
||||||
return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getMyEphemeralNodeFileName() {
|
|
||||||
return System.getenv().get("HBASE_ZNODE_FILE");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createMyEphemeralNode() throws KeeperException {
|
private void createMyEphemeralNode() throws KeeperException {
|
||||||
ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
|
ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
|
||||||
HConstants.EMPTY_BYTE_ARRAY);
|
HConstants.EMPTY_BYTE_ARRAY);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeMyEphemeralNodeOnDisk() throws IOException {
|
|
||||||
String fileName = getMyEphemeralNodeFileName();
|
|
||||||
|
|
||||||
if (fileName == null) {
|
|
||||||
LOG.warn("No filename given to save the znode used, it won't be saved " +
|
|
||||||
"(Environment variable HBASE_ZNODE_FILE is not set).");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
FileWriter fstream = new FileWriter(fileName);
|
|
||||||
BufferedWriter out = new BufferedWriter(fstream);
|
|
||||||
try {
|
|
||||||
out.write(getMyEphemeralNodePath() + "\n");
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
out.close();
|
|
||||||
} finally {
|
|
||||||
fstream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void deleteMyEphemeralNodeOnDisk(){
|
|
||||||
String fileName = getMyEphemeralNodeFileName();
|
|
||||||
|
|
||||||
if (fileName == null){
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
File f = new File(fileName);
|
|
||||||
f.delete();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void deleteMyEphemeralNode() throws KeeperException {
|
private void deleteMyEphemeralNode() throws KeeperException {
|
||||||
ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
|
ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
|
||||||
}
|
}
|
||||||
|
@ -3915,4 +3872,8 @@ public class HRegionServer implements ClientProtocol,
|
||||||
return stoppable.isStopped();
|
return stoppable.isStopped();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getMyEphemeralNodePath() {
|
||||||
|
return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
|
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
|
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
import org.apache.zookeeper.data.Stat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Manages the location of the current active Master for the RegionServer.
|
* Manages the location of the current active Master for the RegionServer.
|
||||||
|
@ -153,4 +154,28 @@ public class MasterAddressTracker extends ZooKeeperNodeTracker {
|
||||||
mbuilder.setMaster(snbuilder.build());
|
mbuilder.setMaster(snbuilder.build());
|
||||||
return ProtobufUtil.prependPBMagic(mbuilder.build().toByteArray());
|
return ProtobufUtil.prependPBMagic(mbuilder.build().toByteArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* delete the master znode if its content is same as the parameter
|
||||||
|
*/
|
||||||
|
public static boolean deleteIfEquals(ZooKeeperWatcher zkw, final String content) {
|
||||||
|
if (content == null){
|
||||||
|
throw new IllegalArgumentException("Content must not be null");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Stat stat = new Stat();
|
||||||
|
byte[] data = ZKUtil.getDataNoWatch(zkw, zkw.getMasterAddressZNode(), stat);
|
||||||
|
ServerName sn = ServerName.parseFrom(data);
|
||||||
|
if (sn != null && content.equals(sn.toString())) {
|
||||||
|
return (ZKUtil.deleteNode(zkw, zkw.getMasterAddressZNode(), stat.getVersion()));
|
||||||
|
}
|
||||||
|
} catch (KeeperException e) {
|
||||||
|
LOG.warn("Can't get or delete the master znode", e);
|
||||||
|
} catch (DeserializationException e) {
|
||||||
|
LOG.warn("Can't get or delete the master znode", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -26,16 +26,20 @@ import static org.junit.Assert.assertNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.concurrent.Semaphore;
|
import java.util.concurrent.Semaphore;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.hbase.*;
|
import org.apache.hadoop.hbase.*;
|
||||||
|
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||||
import org.apache.hadoop.hbase.master.TestActiveMasterManager.NodeDeletionListener;
|
import org.apache.hadoop.hbase.master.TestActiveMasterManager.NodeDeletionListener;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.Threads;
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
import org.apache.zookeeper.CreateMode;
|
import org.apache.zookeeper.CreateMode;
|
||||||
|
import org.apache.zookeeper.KeeperException;
|
||||||
import org.apache.zookeeper.WatchedEvent;
|
import org.apache.zookeeper.WatchedEvent;
|
||||||
import org.apache.zookeeper.Watcher;
|
import org.apache.zookeeper.Watcher;
|
||||||
import org.apache.zookeeper.ZooDefs.Ids;
|
import org.apache.zookeeper.ZooDefs.Ids;
|
||||||
|
@ -313,6 +317,38 @@ public class TestZooKeeperNodeTracker {
|
||||||
public void process(WatchedEvent event) {}
|
public void process(WatchedEvent event) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCleanZNode() throws Exception {
|
||||||
|
ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
|
||||||
|
"testNodeTracker", new TestZooKeeperNodeTracker.StubAbortable());
|
||||||
|
|
||||||
|
final ServerName sn = new ServerName("127.0.0.1:52",45L);
|
||||||
|
|
||||||
|
ZKUtil.createAndFailSilent(zkw,
|
||||||
|
TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_ZNODE_PARENT,
|
||||||
|
HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT));
|
||||||
|
|
||||||
|
final String nodeName = zkw.getMasterAddressZNode();
|
||||||
|
|
||||||
|
// Check that we manage the case when there is no data
|
||||||
|
ZKUtil.createAndFailSilent(zkw, nodeName);
|
||||||
|
MasterAddressTracker.deleteIfEquals(zkw, sn.toString());
|
||||||
|
Assert.assertFalse(ZKUtil.getData(zkw, nodeName) == null);
|
||||||
|
|
||||||
|
// Check that we don't delete if we're not supposed to
|
||||||
|
ZKUtil.setData(zkw, nodeName, MasterAddressTracker.toByteArray(sn));
|
||||||
|
MasterAddressTracker.deleteIfEquals(zkw, new ServerName("127.0.0.2:52",45L).toString());
|
||||||
|
Assert.assertFalse(ZKUtil.getData(zkw, nodeName) == null);
|
||||||
|
|
||||||
|
// Check that we delete when we're supposed to
|
||||||
|
ZKUtil.setData(zkw, nodeName,MasterAddressTracker.toByteArray(sn));
|
||||||
|
MasterAddressTracker.deleteIfEquals(zkw, sn.toString());
|
||||||
|
Assert.assertTrue( ZKUtil.getData(zkw, nodeName)== null );
|
||||||
|
|
||||||
|
// Check that we support the case when the znode does not exist
|
||||||
|
MasterAddressTracker.deleteIfEquals(zkw, sn.toString()); // must not throw an exception
|
||||||
|
}
|
||||||
|
|
||||||
@org.junit.Rule
|
@org.junit.Rule
|
||||||
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
||||||
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
|
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
|
||||||
|
|
Loading…
Reference in New Issue