diff --git a/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java b/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java index b84942951ba..7618e30baf5 100644 --- a/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java +++ b/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java @@ -33,6 +33,7 @@ import java.util.TreeMap; import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.VersionMismatchException; import org.apache.hadoop.io.VersionedWritable; /** @@ -42,13 +43,15 @@ import org.apache.hadoop.io.VersionedWritable; * */ public class ClusterStatus extends VersionedWritable { @@ -56,16 +59,21 @@ public class ClusterStatus extends VersionedWritable { * Version for object serialization. Incremented for changes in serialized * representation. *
- *
0
initial version
- *
1
added cluster ID
+ *
0
Initial version
+ *
1
Added cluster ID
*
2
Added Map of ServerName to ServerLoad
+ *
3
Added master and backupMasters
*
*/ + private static final byte VERSION_MASTER_BACKUPMASTERS = 2; private static final byte VERSION = 2; + private static final String UNKNOWN_SERVERNAME = "unknown"; private String hbaseVersion; private Map liveServers; private Collection deadServers; + private ServerName master; + private Collection backupMasters; private Map intransition; private String clusterId; private String[] masterCoprocessors; @@ -79,11 +87,16 @@ public class ClusterStatus extends VersionedWritable { public ClusterStatus(final String hbaseVersion, final String clusterid, final Map servers, - final Collection deadServers, final Map rit, + final Collection deadServers, + final ServerName master, + final Collection backupMasters, + final Map rit, final String[] masterCoprocessors) { this.hbaseVersion = hbaseVersion; this.liveServers = servers; this.deadServers = deadServers; + this.master = master; + this.backupMasters = backupMasters; this.intransition = rit; this.clusterId = clusterid; this.masterCoprocessors = masterCoprocessors; @@ -160,8 +173,11 @@ public class ClusterStatus extends VersionedWritable { return (getVersion() == ((ClusterStatus)o).getVersion()) && getHBaseVersion().equals(((ClusterStatus)o).getHBaseVersion()) && this.liveServers.equals(((ClusterStatus)o).liveServers) && - deadServers.equals(((ClusterStatus)o).deadServers) && - Arrays.equals(this.masterCoprocessors, ((ClusterStatus)o).masterCoprocessors); + this.deadServers.equals(((ClusterStatus)o).deadServers) && + Arrays.equals(this.masterCoprocessors, + ((ClusterStatus)o).masterCoprocessors) && + this.master.equals(((ClusterStatus)o).master) && + this.backupMasters.equals(((ClusterStatus)o).backupMasters); } /** @@ -169,7 +185,8 @@ public class ClusterStatus extends VersionedWritable { */ public int hashCode() { return VERSION + hbaseVersion.hashCode() + this.liveServers.hashCode() + - deadServers.hashCode(); + this.deadServers.hashCode() + this.master.hashCode() + + this.backupMasters.hashCode(); } /** @return the object version number */ @@ -195,6 +212,28 @@ public class ClusterStatus extends VersionedWritable { return Collections.unmodifiableCollection(this.liveServers.keySet()); } + /** + * Returns detailed information about the current master {@link ServerName}. + * @return current master information if it exists + */ + public ServerName getMaster() { + return this.master; + } + + /** + * @return the number of backup masters in the cluster + */ + public int getBackupMastersSize() { + return this.backupMasters.size(); + } + + /** + * @return the names of backup masters + */ + public Collection getBackupMasters() { + return Collections.unmodifiableCollection(this.backupMasters); + } + /** * @param sn * @return Server's load or null if not found. @@ -241,10 +280,26 @@ public class ClusterStatus extends VersionedWritable { for(String masterCoprocessor: masterCoprocessors) { out.writeUTF(masterCoprocessor); } + Bytes.writeByteArray(out, this.master.getVersionedBytes()); + out.writeInt(this.backupMasters.size()); + for (ServerName backupMaster: this.backupMasters) { + Bytes.writeByteArray(out, backupMaster.getVersionedBytes()); + } } public void readFields(DataInput in) throws IOException { - super.readFields(in); + int version = getVersion(); + try { + super.readFields(in); + } catch (VersionMismatchException e) { + /* + * No API in VersionMismatchException to get the expected and found + * versions. We use the only tool available to us: toString(), whose + * output has a dependency on hadoop-common. Boo. + */ + int startIndex = e.toString().lastIndexOf('v') + 1; + version = Integer.parseInt(e.toString().substring(startIndex)); + } hbaseVersion = in.readUTF(); int count = in.readInt(); this.liveServers = new HashMap(count); @@ -273,5 +328,21 @@ public class ClusterStatus extends VersionedWritable { for(int i = 0; i < masterCoprocessorsLength; i++) { masterCoprocessors[i] = in.readUTF(); } + // Only read extra fields for master and backup masters if + // version indicates that we should do so, else use defaults + if (version >= VERSION_MASTER_BACKUPMASTERS) { + this.master = ServerName.parseVersionedServerName( + Bytes.readByteArray(in)); + count = in.readInt(); + this.backupMasters = new ArrayList(count); + for (int i = 0; i < count; i++) { + this.backupMasters.add(ServerName.parseVersionedServerName( + Bytes.readByteArray(in))); + } + } else { + this.master = new ServerName(UNKNOWN_SERVERNAME, -1, + ServerName.NON_STARTCODE); + this.backupMasters = new ArrayList(0); + } } } diff --git a/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java b/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java index 2f60b23165a..fe4710b87e3 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java @@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.monitoring.MonitoredTask; @@ -131,8 +132,16 @@ class ActiveMasterManager extends ZooKeeperListener { // Try to become the active master, watch if there is another master. // Write out our ServerName as versioned bytes. try { + String backupZNode = ZKUtil.joinZNode( + this.watcher.backupMasterAddressesZNode, this.sn.toString()); if (ZKUtil.createEphemeralNodeAndWatch(this.watcher, - this.watcher.masterAddressZNode, sn.getVersionedBytes())) { + this.watcher.masterAddressZNode, this.sn.getVersionedBytes())) { + // If we were a backup master before, delete our ZNode from the backup + // master directory since we are the active now + LOG.info("Deleting ZNode for " + backupZNode + + " from backup master directory"); + ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode); + // We are the master, return startupStatus.setStatus("Successfully registered as active master."); this.clusterHasActiveMaster.set(true); @@ -144,22 +153,41 @@ class ActiveMasterManager extends ZooKeeperListener { // There is another active master running elsewhere or this is a restart // and the master ephemeral node has not expired yet. this.clusterHasActiveMaster.set(true); + + /* + * Add a ZNode for ourselves in the backup master directory since we are + * not the active master. + * + * If we become the active master later, ActiveMasterManager will delete + * this node explicitly. If we crash before then, ZooKeeper will delete + * this node for us since it is ephemeral. + */ + LOG.info("Adding ZNode for " + backupZNode + + " in backup master directory"); + ZKUtil.createEphemeralNodeAndWatch(this.watcher, backupZNode, + HConstants.EMPTY_BYTE_ARRAY); + + String msg; byte [] bytes = ZKUtil.getDataAndWatch(this.watcher, this.watcher.masterAddressZNode); - ServerName currentMaster = ServerName.parseVersionedServerName(bytes); - if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) { - String msg = ("Current master has this master's address, " + currentMaster + - "; master was restarted? Waiting on znode to expire..."); - LOG.info(msg); - startupStatus.setStatus(msg); - // Hurry along the expiration of the znode. - ZKUtil.deleteNode(this.watcher, this.watcher.masterAddressZNode); + if (bytes == null) { + msg = ("A master was detected, but went down before its address " + + "could be read. Attempting to become the next active master"); } else { - String msg = "Another master is the active master, " + currentMaster + - "; waiting to become the next active master"; - LOG.info(msg); - startupStatus.setStatus(msg); + ServerName currentMaster = ServerName.parseVersionedServerName(bytes); + if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) { + msg = ("Current master has this master's address, " + + currentMaster + "; master was restarted? Waiting on znode " + + "to expire..."); + // Hurry along the expiration of the znode. + ZKUtil.deleteNode(this.watcher, this.watcher.masterAddressZNode); + } else { + msg = "Another master is the active master, " + currentMaster + + "; waiting to become the next active master"; + } } + LOG.info(msg); + startupStatus.setStatus(msg); } catch (KeeperException ke) { master.abort("Received an unexpected KeeperException, aborting", ke); return false; diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 9d21903cbaf..4913d3441b9 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -26,6 +26,7 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -98,6 +99,7 @@ import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker; import org.apache.hadoop.hbase.zookeeper.MasterSchemaChangeTracker; import org.apache.hadoop.hbase.zookeeper.RegionServerTracker; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Text; @@ -1379,10 +1381,27 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { * @return cluster status */ public ClusterStatus getClusterStatus() { + // Build Set of backup masters from ZK nodes + List backupMasterStrings; + try { + backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper, + this.zooKeeper.backupMasterAddressesZNode); + } catch (KeeperException e) { + LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e); + backupMasterStrings = new ArrayList(0); + } + List backupMasters = new ArrayList( + backupMasterStrings.size()); + for (String s: backupMasterStrings) { + backupMasters.add(new ServerName(s)); + } + return new ClusterStatus(VersionInfo.getVersion(), this.fileSystemManager.getClusterId(), this.serverManager.getOnlineServers(), this.serverManager.getDeadServers(), + this.serverName, + backupMasters, this.assignmentManager.getRegionsInTransition(), this.getCoprocessors()); } diff --git a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index f6f3f71b76a..e55b906cba3 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -218,6 +218,18 @@ public class HBaseFsck { } } + // Print the current master name and state + errors.print("Master: " + status.getMaster()); + + // Print the list of all backup masters + Collection backupMasters = status.getBackupMasters(); + errors.print("Number of backup masters: " + backupMasters.size()); + if (details) { + for (ServerName name: backupMasters) { + errors.print(" " + name); + } + } + // Determine what's deployed processRegionServers(regionServers); diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index 111f76e5ec1..719a17691ab 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -1034,8 +1034,13 @@ public class ZKUtil { StringBuilder sb = new StringBuilder(); try { sb.append("HBase is rooted at ").append(zkw.baseZNode); - sb.append("\nMaster address: ").append( + sb.append("\nActive master address: ").append( ServerName.parseVersionedServerName(getData(zkw, zkw.masterAddressZNode))); + sb.append("\nBackup master addresses:"); + for (String child : listChildrenNoWatch(zkw, + zkw.backupMasterAddressesZNode)) { + sb.append("\n ").append(child); + } sb.append("\nRegion server holding ROOT: ").append( Bytes.toStringBinary(getData(zkw, zkw.rootServerZNode))); sb.append("\nRegion servers:"); diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java index 3e3d131e418..45d7d6a88df 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java @@ -90,6 +90,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable { public String drainingZNode; // znode of currently active master public String masterAddressZNode; + // znode of this master in backup master directory, if not the active master + public String backupMasterAddressesZNode; // znode containing the current cluster state public String clusterStateZNode; // znode used for region transitioning and assignment @@ -165,6 +167,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable { ZKUtil.createAndFailSilent(this, tableZNode); ZKUtil.createAndFailSilent(this, splitLogZNode); ZKUtil.createAndFailSilent(this, schemaZNode); + ZKUtil.createAndFailSilent(this, backupMasterAddressesZNode); } catch (KeeperException e) { throw new ZooKeeperConnectionException( prefix("Unexpected KeeperException creating base node"), e); @@ -204,6 +207,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable { conf.get("zookeeper.znode.draining.rs", "draining")); masterAddressZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.master", "master")); + backupMasterAddressesZNode = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.backup.masters", "backup-masters")); clusterStateZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.state", "shutdown")); assignmentZNode = ZKUtil.joinZNode(baseZNode, diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java b/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java index 16e4744d52e..c00b08c16ec 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java @@ -63,8 +63,7 @@ public class TestActiveMasterManager { @Test public void testRestartMaster() throws IOException, KeeperException { ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), - "testActiveMasterManagerFromZK", null); - ZKUtil.createAndFailSilent(zk, zk.baseZNode); + "testActiveMasterManagerFromZK", null, true); try { ZKUtil.deleteNode(zk, zk.masterAddressZNode); } catch(KeeperException.NoNodeException nne) {} @@ -103,8 +102,7 @@ public class TestActiveMasterManager { @Test public void testActiveMasterManagerFromZK() throws Exception { ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), - "testActiveMasterManagerFromZK", null); - ZKUtil.createAndFailSilent(zk, zk.baseZNode); + "testActiveMasterManagerFromZK", null, true); try { ZKUtil.deleteNode(zk, zk.masterAddressZNode); } catch(KeeperException.NoNodeException nne) {} diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index bc98fb0cc01..63882e199fe 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -89,16 +90,25 @@ public class TestMasterFailover { int numActive = 0; int activeIndex = -1; ServerName activeName = null; + HMaster active = null; for (int i = 0; i < masterThreads.size(); i++) { if (masterThreads.get(i).getMaster().isActiveMaster()) { numActive++; activeIndex = i; - activeName = masterThreads.get(i).getMaster().getServerName(); + active = masterThreads.get(activeIndex).getMaster(); + activeName = active.getServerName(); } } assertEquals(1, numActive); assertEquals(NUM_MASTERS, masterThreads.size()); + // Check that ClusterStatus reports the correct active and backup masters + assertNotNull(active); + ClusterStatus status = active.getClusterStatus(); + assertTrue(status.getMaster().equals(activeName)); + assertEquals(2, status.getBackupMastersSize()); + assertEquals(2, status.getBackupMasters().size()); + // attempt to stop one of the inactive masters LOG.debug("\n\nStopping a backup master\n"); int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1); @@ -111,6 +121,7 @@ public class TestMasterFailover { assertTrue(activeName.equals( masterThreads.get(i).getMaster().getServerName())); activeIndex = i; + active = masterThreads.get(activeIndex).getMaster(); } } assertEquals(1, numActive); @@ -119,6 +130,13 @@ public class TestMasterFailover { LOG.info("Active master managing " + rsCount + " regions servers"); assertEquals(3, rsCount); + // Check that ClusterStatus reports the correct active and backup masters + assertNotNull(active); + status = active.getClusterStatus(); + assertTrue(status.getMaster().equals(activeName)); + assertEquals(1, status.getBackupMastersSize()); + assertEquals(1, status.getBackupMasters().size()); + // kill the active master LOG.debug("\n\nStopping the active master\n"); cluster.stopMaster(activeIndex, false); @@ -132,10 +150,17 @@ public class TestMasterFailover { assertEquals(1, masterThreads.size()); // and he should be active - HMaster active = masterThreads.get(0).getMaster(); - int rss = active.getClusterStatus().getServersSize(); - LOG.info("Active master managing " + rss + " regions servers"); + active = masterThreads.get(0).getMaster(); + assertNotNull(active); + status = active.getClusterStatus(); + ServerName mastername = status.getMaster(); + assertTrue(mastername.equals(active.getServerName())); assertTrue(active.isActiveMaster()); + assertEquals(0, status.getBackupMastersSize()); + assertEquals(0, status.getBackupMasters().size()); + int rss = status.getServersSize(); + LOG.info("Active master " + mastername.getHostname() + " managing " + + rss + " region servers"); assertEquals(3, rss); // Stop the cluster