HBASE-5209 HConnection/HMasterInterface should allow for way to get hostname of currently active master in multi-master HBase setup
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1290942 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d69299dfdc
commit
145d25dfc2
|
@ -33,6 +33,7 @@ import java.util.TreeMap;
|
|||
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.VersionMismatchException;
|
||||
import org.apache.hadoop.io.VersionedWritable;
|
||||
|
||||
/**
|
||||
|
@ -42,13 +43,15 @@ import org.apache.hadoop.io.VersionedWritable;
|
|||
* <ul>
|
||||
* <li>The count and names of region servers in the cluster.</li>
|
||||
* <li>The count and names of dead region servers in the cluster.</li>
|
||||
* <li>The name of the active master for the cluster.</li>
|
||||
* <li>The name(s) of the backup master(s) for the cluster, if they exist.</li>
|
||||
* <li>The average cluster load.</li>
|
||||
* <li>The number of regions deployed on the cluster.</li>
|
||||
* <li>The number of requests since last report.</li>
|
||||
* <li>Detailed region server loading and resource usage information,
|
||||
* per server and per region.</li>
|
||||
* <li>Regions in transition at master</li>
|
||||
* <li>The unique cluster ID</li>
|
||||
* <li>Regions in transition at master</li>
|
||||
* <li>The unique cluster ID</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class ClusterStatus extends VersionedWritable {
|
||||
|
@ -56,16 +59,21 @@ public class ClusterStatus extends VersionedWritable {
|
|||
* Version for object serialization. Incremented for changes in serialized
|
||||
* representation.
|
||||
* <dl>
|
||||
* <dt>0</dt> <dd>initial version</dd>
|
||||
* <dt>1</dt> <dd>added cluster ID</dd>
|
||||
* <dt>0</dt> <dd>Initial version</dd>
|
||||
* <dt>1</dt> <dd>Added cluster ID</dd>
|
||||
* <dt>2</dt> <dd>Added Map of ServerName to ServerLoad</dd>
|
||||
* <dt>3</dt> <dd>Added master and backupMasters</dd>
|
||||
* </dl>
|
||||
*/
|
||||
private static final byte VERSION_MASTER_BACKUPMASTERS = 2;
|
||||
private static final byte VERSION = 2;
|
||||
private static final String UNKNOWN_SERVERNAME = "unknown";
|
||||
|
||||
private String hbaseVersion;
|
||||
private Map<ServerName, HServerLoad> liveServers;
|
||||
private Collection<ServerName> deadServers;
|
||||
private ServerName master;
|
||||
private Collection<ServerName> backupMasters;
|
||||
private Map<String, RegionState> intransition;
|
||||
private String clusterId;
|
||||
private String[] masterCoprocessors;
|
||||
|
@ -79,11 +87,16 @@ public class ClusterStatus extends VersionedWritable {
|
|||
|
||||
public ClusterStatus(final String hbaseVersion, final String clusterid,
|
||||
final Map<ServerName, HServerLoad> servers,
|
||||
final Collection<ServerName> deadServers, final Map<String, RegionState> rit,
|
||||
final Collection<ServerName> deadServers,
|
||||
final ServerName master,
|
||||
final Collection<ServerName> backupMasters,
|
||||
final Map<String, RegionState> rit,
|
||||
final String[] masterCoprocessors) {
|
||||
this.hbaseVersion = hbaseVersion;
|
||||
this.liveServers = servers;
|
||||
this.deadServers = deadServers;
|
||||
this.master = master;
|
||||
this.backupMasters = backupMasters;
|
||||
this.intransition = rit;
|
||||
this.clusterId = clusterid;
|
||||
this.masterCoprocessors = masterCoprocessors;
|
||||
|
@ -160,8 +173,11 @@ public class ClusterStatus extends VersionedWritable {
|
|||
return (getVersion() == ((ClusterStatus)o).getVersion()) &&
|
||||
getHBaseVersion().equals(((ClusterStatus)o).getHBaseVersion()) &&
|
||||
this.liveServers.equals(((ClusterStatus)o).liveServers) &&
|
||||
deadServers.equals(((ClusterStatus)o).deadServers) &&
|
||||
Arrays.equals(this.masterCoprocessors, ((ClusterStatus)o).masterCoprocessors);
|
||||
this.deadServers.equals(((ClusterStatus)o).deadServers) &&
|
||||
Arrays.equals(this.masterCoprocessors,
|
||||
((ClusterStatus)o).masterCoprocessors) &&
|
||||
this.master.equals(((ClusterStatus)o).master) &&
|
||||
this.backupMasters.equals(((ClusterStatus)o).backupMasters);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -169,7 +185,8 @@ public class ClusterStatus extends VersionedWritable {
|
|||
*/
|
||||
public int hashCode() {
|
||||
return VERSION + hbaseVersion.hashCode() + this.liveServers.hashCode() +
|
||||
deadServers.hashCode();
|
||||
this.deadServers.hashCode() + this.master.hashCode() +
|
||||
this.backupMasters.hashCode();
|
||||
}
|
||||
|
||||
/** @return the object version number */
|
||||
|
@ -195,6 +212,28 @@ public class ClusterStatus extends VersionedWritable {
|
|||
return Collections.unmodifiableCollection(this.liveServers.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detailed information about the current master {@link ServerName}.
|
||||
* @return current master information if it exists
|
||||
*/
|
||||
public ServerName getMaster() {
|
||||
return this.master;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of backup masters in the cluster
|
||||
*/
|
||||
public int getBackupMastersSize() {
|
||||
return this.backupMasters.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the names of backup masters
|
||||
*/
|
||||
public Collection<ServerName> getBackupMasters() {
|
||||
return Collections.unmodifiableCollection(this.backupMasters);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sn
|
||||
* @return Server's load or null if not found.
|
||||
|
@ -241,10 +280,26 @@ public class ClusterStatus extends VersionedWritable {
|
|||
for(String masterCoprocessor: masterCoprocessors) {
|
||||
out.writeUTF(masterCoprocessor);
|
||||
}
|
||||
Bytes.writeByteArray(out, this.master.getVersionedBytes());
|
||||
out.writeInt(this.backupMasters.size());
|
||||
for (ServerName backupMaster: this.backupMasters) {
|
||||
Bytes.writeByteArray(out, backupMaster.getVersionedBytes());
|
||||
}
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
super.readFields(in);
|
||||
int version = getVersion();
|
||||
try {
|
||||
super.readFields(in);
|
||||
} catch (VersionMismatchException e) {
|
||||
/*
|
||||
* No API in VersionMismatchException to get the expected and found
|
||||
* versions. We use the only tool available to us: toString(), whose
|
||||
* output has a dependency on hadoop-common. Boo.
|
||||
*/
|
||||
int startIndex = e.toString().lastIndexOf('v') + 1;
|
||||
version = Integer.parseInt(e.toString().substring(startIndex));
|
||||
}
|
||||
hbaseVersion = in.readUTF();
|
||||
int count = in.readInt();
|
||||
this.liveServers = new HashMap<ServerName, HServerLoad>(count);
|
||||
|
@ -273,5 +328,21 @@ public class ClusterStatus extends VersionedWritable {
|
|||
for(int i = 0; i < masterCoprocessorsLength; i++) {
|
||||
masterCoprocessors[i] = in.readUTF();
|
||||
}
|
||||
// Only read extra fields for master and backup masters if
|
||||
// version indicates that we should do so, else use defaults
|
||||
if (version >= VERSION_MASTER_BACKUPMASTERS) {
|
||||
this.master = ServerName.parseVersionedServerName(
|
||||
Bytes.readByteArray(in));
|
||||
count = in.readInt();
|
||||
this.backupMasters = new ArrayList<ServerName>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
this.backupMasters.add(ServerName.parseVersionedServerName(
|
||||
Bytes.readByteArray(in)));
|
||||
}
|
||||
} else {
|
||||
this.master = new ServerName(UNKNOWN_SERVERNAME, -1,
|
||||
ServerName.NON_STARTCODE);
|
||||
this.backupMasters = new ArrayList<ServerName>(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.Server;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||
|
@ -131,8 +132,16 @@ class ActiveMasterManager extends ZooKeeperListener {
|
|||
// Try to become the active master, watch if there is another master.
|
||||
// Write out our ServerName as versioned bytes.
|
||||
try {
|
||||
String backupZNode = ZKUtil.joinZNode(
|
||||
this.watcher.backupMasterAddressesZNode, this.sn.toString());
|
||||
if (ZKUtil.createEphemeralNodeAndWatch(this.watcher,
|
||||
this.watcher.masterAddressZNode, sn.getVersionedBytes())) {
|
||||
this.watcher.masterAddressZNode, this.sn.getVersionedBytes())) {
|
||||
// If we were a backup master before, delete our ZNode from the backup
|
||||
// master directory since we are the active now
|
||||
LOG.info("Deleting ZNode for " + backupZNode +
|
||||
" from backup master directory");
|
||||
ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
|
||||
|
||||
// We are the master, return
|
||||
startupStatus.setStatus("Successfully registered as active master.");
|
||||
this.clusterHasActiveMaster.set(true);
|
||||
|
@ -144,22 +153,41 @@ class ActiveMasterManager extends ZooKeeperListener {
|
|||
// There is another active master running elsewhere or this is a restart
|
||||
// and the master ephemeral node has not expired yet.
|
||||
this.clusterHasActiveMaster.set(true);
|
||||
|
||||
/*
|
||||
* Add a ZNode for ourselves in the backup master directory since we are
|
||||
* not the active master.
|
||||
*
|
||||
* If we become the active master later, ActiveMasterManager will delete
|
||||
* this node explicitly. If we crash before then, ZooKeeper will delete
|
||||
* this node for us since it is ephemeral.
|
||||
*/
|
||||
LOG.info("Adding ZNode for " + backupZNode +
|
||||
" in backup master directory");
|
||||
ZKUtil.createEphemeralNodeAndWatch(this.watcher, backupZNode,
|
||||
HConstants.EMPTY_BYTE_ARRAY);
|
||||
|
||||
String msg;
|
||||
byte [] bytes =
|
||||
ZKUtil.getDataAndWatch(this.watcher, this.watcher.masterAddressZNode);
|
||||
ServerName currentMaster = ServerName.parseVersionedServerName(bytes);
|
||||
if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
|
||||
String msg = ("Current master has this master's address, " + currentMaster +
|
||||
"; master was restarted? Waiting on znode to expire...");
|
||||
LOG.info(msg);
|
||||
startupStatus.setStatus(msg);
|
||||
// Hurry along the expiration of the znode.
|
||||
ZKUtil.deleteNode(this.watcher, this.watcher.masterAddressZNode);
|
||||
if (bytes == null) {
|
||||
msg = ("A master was detected, but went down before its address " +
|
||||
"could be read. Attempting to become the next active master");
|
||||
} else {
|
||||
String msg = "Another master is the active master, " + currentMaster +
|
||||
"; waiting to become the next active master";
|
||||
LOG.info(msg);
|
||||
startupStatus.setStatus(msg);
|
||||
ServerName currentMaster = ServerName.parseVersionedServerName(bytes);
|
||||
if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
|
||||
msg = ("Current master has this master's address, " +
|
||||
currentMaster + "; master was restarted? Waiting on znode " +
|
||||
"to expire...");
|
||||
// Hurry along the expiration of the znode.
|
||||
ZKUtil.deleteNode(this.watcher, this.watcher.masterAddressZNode);
|
||||
} else {
|
||||
msg = "Another master is the active master, " + currentMaster +
|
||||
"; waiting to become the next active master";
|
||||
}
|
||||
}
|
||||
LOG.info(msg);
|
||||
startupStatus.setStatus(msg);
|
||||
} catch (KeeperException ke) {
|
||||
master.abort("Received an unexpected KeeperException, aborting", ke);
|
||||
return false;
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.net.InetAddress;
|
|||
import java.net.InetSocketAddress;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -98,6 +99,7 @@ import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
|
|||
import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
|
||||
import org.apache.hadoop.hbase.zookeeper.MasterSchemaChangeTracker;
|
||||
import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.hadoop.io.MapWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
@ -1379,10 +1381,27 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
* @return cluster status
|
||||
*/
|
||||
public ClusterStatus getClusterStatus() {
|
||||
// Build Set of backup masters from ZK nodes
|
||||
List<String> backupMasterStrings;
|
||||
try {
|
||||
backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
|
||||
this.zooKeeper.backupMasterAddressesZNode);
|
||||
} catch (KeeperException e) {
|
||||
LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
|
||||
backupMasterStrings = new ArrayList<String>(0);
|
||||
}
|
||||
List<ServerName> backupMasters = new ArrayList<ServerName>(
|
||||
backupMasterStrings.size());
|
||||
for (String s: backupMasterStrings) {
|
||||
backupMasters.add(new ServerName(s));
|
||||
}
|
||||
|
||||
return new ClusterStatus(VersionInfo.getVersion(),
|
||||
this.fileSystemManager.getClusterId(),
|
||||
this.serverManager.getOnlineServers(),
|
||||
this.serverManager.getDeadServers(),
|
||||
this.serverName,
|
||||
backupMasters,
|
||||
this.assignmentManager.getRegionsInTransition(),
|
||||
this.getCoprocessors());
|
||||
}
|
||||
|
|
|
@ -218,6 +218,18 @@ public class HBaseFsck {
|
|||
}
|
||||
}
|
||||
|
||||
// Print the current master name and state
|
||||
errors.print("Master: " + status.getMaster());
|
||||
|
||||
// Print the list of all backup masters
|
||||
Collection<ServerName> backupMasters = status.getBackupMasters();
|
||||
errors.print("Number of backup masters: " + backupMasters.size());
|
||||
if (details) {
|
||||
for (ServerName name: backupMasters) {
|
||||
errors.print(" " + name);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine what's deployed
|
||||
processRegionServers(regionServers);
|
||||
|
||||
|
|
|
@ -1034,8 +1034,13 @@ public class ZKUtil {
|
|||
StringBuilder sb = new StringBuilder();
|
||||
try {
|
||||
sb.append("HBase is rooted at ").append(zkw.baseZNode);
|
||||
sb.append("\nMaster address: ").append(
|
||||
sb.append("\nActive master address: ").append(
|
||||
ServerName.parseVersionedServerName(getData(zkw, zkw.masterAddressZNode)));
|
||||
sb.append("\nBackup master addresses:");
|
||||
for (String child : listChildrenNoWatch(zkw,
|
||||
zkw.backupMasterAddressesZNode)) {
|
||||
sb.append("\n ").append(child);
|
||||
}
|
||||
sb.append("\nRegion server holding ROOT: ").append(
|
||||
Bytes.toStringBinary(getData(zkw, zkw.rootServerZNode)));
|
||||
sb.append("\nRegion servers:");
|
||||
|
|
|
@ -90,6 +90,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
|
|||
public String drainingZNode;
|
||||
// znode of currently active master
|
||||
public String masterAddressZNode;
|
||||
// znode of this master in backup master directory, if not the active master
|
||||
public String backupMasterAddressesZNode;
|
||||
// znode containing the current cluster state
|
||||
public String clusterStateZNode;
|
||||
// znode used for region transitioning and assignment
|
||||
|
@ -165,6 +167,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
|
|||
ZKUtil.createAndFailSilent(this, tableZNode);
|
||||
ZKUtil.createAndFailSilent(this, splitLogZNode);
|
||||
ZKUtil.createAndFailSilent(this, schemaZNode);
|
||||
ZKUtil.createAndFailSilent(this, backupMasterAddressesZNode);
|
||||
} catch (KeeperException e) {
|
||||
throw new ZooKeeperConnectionException(
|
||||
prefix("Unexpected KeeperException creating base node"), e);
|
||||
|
@ -204,6 +207,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
|
|||
conf.get("zookeeper.znode.draining.rs", "draining"));
|
||||
masterAddressZNode = ZKUtil.joinZNode(baseZNode,
|
||||
conf.get("zookeeper.znode.master", "master"));
|
||||
backupMasterAddressesZNode = ZKUtil.joinZNode(baseZNode,
|
||||
conf.get("zookeeper.znode.backup.masters", "backup-masters"));
|
||||
clusterStateZNode = ZKUtil.joinZNode(baseZNode,
|
||||
conf.get("zookeeper.znode.state", "shutdown"));
|
||||
assignmentZNode = ZKUtil.joinZNode(baseZNode,
|
||||
|
|
|
@ -63,8 +63,7 @@ public class TestActiveMasterManager {
|
|||
|
||||
@Test public void testRestartMaster() throws IOException, KeeperException {
|
||||
ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
|
||||
"testActiveMasterManagerFromZK", null);
|
||||
ZKUtil.createAndFailSilent(zk, zk.baseZNode);
|
||||
"testActiveMasterManagerFromZK", null, true);
|
||||
try {
|
||||
ZKUtil.deleteNode(zk, zk.masterAddressZNode);
|
||||
} catch(KeeperException.NoNodeException nne) {}
|
||||
|
@ -103,8 +102,7 @@ public class TestActiveMasterManager {
|
|||
@Test
|
||||
public void testActiveMasterManagerFromZK() throws Exception {
|
||||
ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
|
||||
"testActiveMasterManagerFromZK", null);
|
||||
ZKUtil.createAndFailSilent(zk, zk.baseZNode);
|
||||
"testActiveMasterManagerFromZK", null, true);
|
||||
try {
|
||||
ZKUtil.deleteNode(zk, zk.masterAddressZNode);
|
||||
} catch(KeeperException.NoNodeException nne) {}
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -89,16 +90,25 @@ public class TestMasterFailover {
|
|||
int numActive = 0;
|
||||
int activeIndex = -1;
|
||||
ServerName activeName = null;
|
||||
HMaster active = null;
|
||||
for (int i = 0; i < masterThreads.size(); i++) {
|
||||
if (masterThreads.get(i).getMaster().isActiveMaster()) {
|
||||
numActive++;
|
||||
activeIndex = i;
|
||||
activeName = masterThreads.get(i).getMaster().getServerName();
|
||||
active = masterThreads.get(activeIndex).getMaster();
|
||||
activeName = active.getServerName();
|
||||
}
|
||||
}
|
||||
assertEquals(1, numActive);
|
||||
assertEquals(NUM_MASTERS, masterThreads.size());
|
||||
|
||||
// Check that ClusterStatus reports the correct active and backup masters
|
||||
assertNotNull(active);
|
||||
ClusterStatus status = active.getClusterStatus();
|
||||
assertTrue(status.getMaster().equals(activeName));
|
||||
assertEquals(2, status.getBackupMastersSize());
|
||||
assertEquals(2, status.getBackupMasters().size());
|
||||
|
||||
// attempt to stop one of the inactive masters
|
||||
LOG.debug("\n\nStopping a backup master\n");
|
||||
int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
|
||||
|
@ -111,6 +121,7 @@ public class TestMasterFailover {
|
|||
assertTrue(activeName.equals(
|
||||
masterThreads.get(i).getMaster().getServerName()));
|
||||
activeIndex = i;
|
||||
active = masterThreads.get(activeIndex).getMaster();
|
||||
}
|
||||
}
|
||||
assertEquals(1, numActive);
|
||||
|
@ -119,6 +130,13 @@ public class TestMasterFailover {
|
|||
LOG.info("Active master managing " + rsCount + " regions servers");
|
||||
assertEquals(3, rsCount);
|
||||
|
||||
// Check that ClusterStatus reports the correct active and backup masters
|
||||
assertNotNull(active);
|
||||
status = active.getClusterStatus();
|
||||
assertTrue(status.getMaster().equals(activeName));
|
||||
assertEquals(1, status.getBackupMastersSize());
|
||||
assertEquals(1, status.getBackupMasters().size());
|
||||
|
||||
// kill the active master
|
||||
LOG.debug("\n\nStopping the active master\n");
|
||||
cluster.stopMaster(activeIndex, false);
|
||||
|
@ -132,10 +150,17 @@ public class TestMasterFailover {
|
|||
assertEquals(1, masterThreads.size());
|
||||
|
||||
// and he should be active
|
||||
HMaster active = masterThreads.get(0).getMaster();
|
||||
int rss = active.getClusterStatus().getServersSize();
|
||||
LOG.info("Active master managing " + rss + " regions servers");
|
||||
active = masterThreads.get(0).getMaster();
|
||||
assertNotNull(active);
|
||||
status = active.getClusterStatus();
|
||||
ServerName mastername = status.getMaster();
|
||||
assertTrue(mastername.equals(active.getServerName()));
|
||||
assertTrue(active.isActiveMaster());
|
||||
assertEquals(0, status.getBackupMastersSize());
|
||||
assertEquals(0, status.getBackupMasters().size());
|
||||
int rss = status.getServersSize();
|
||||
LOG.info("Active master " + mastername.getHostname() + " managing " +
|
||||
rss + " region servers");
|
||||
assertEquals(3, rss);
|
||||
|
||||
// Stop the cluster
|
||||
|
|
Loading…
Reference in New Issue