HBASE-1921 When the Master's session times out and there's only one, cluster is wedged

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@830820 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jean-Daniel Cryans 2009-10-29 01:24:03 +00:00
parent 67c1fc3436
commit c7df216c11
5 changed files with 84 additions and 49 deletions

View File

@ -143,6 +143,7 @@ Release 0.21.0 - Unreleased
HBASE-1756 Refactor HLog (changing package first) HBASE-1756 Refactor HLog (changing package first)
HBASE-1926 Remove unused xmlenc jar from trunk HBASE-1926 Remove unused xmlenc jar from trunk
HBASE-1936 HLog group commit HBASE-1936 HLog group commit
HBASE-1921 When the Master's session times out and there's only one, cluster is wedged
OPTIMIZATIONS OPTIMIZATIONS
HBASE-410 [testing] Speed up the test suite HBASE-410 [testing] Speed up the test suite

View File

@ -126,7 +126,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
// Metrics is set when we call run. // Metrics is set when we call run.
private final MasterMetrics metrics; private final MasterMetrics metrics;
// Our zk client. // Our zk client.
private final ZooKeeperWrapper zooKeeperWrapper; private ZooKeeperWrapper zooKeeperWrapper;
// Watcher for master address and for cluster shutdown. // Watcher for master address and for cluster shutdown.
private final ZKMasterAddressWatcher zkMasterAddressWatcher; private final ZKMasterAddressWatcher zkMasterAddressWatcher;
// A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo. // A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo.
@ -187,7 +187,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
this.zooKeeperWrapper = new ZooKeeperWrapper(conf, this); this.zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
this.zkMasterAddressWatcher = this.zkMasterAddressWatcher =
new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested); new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address); this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address, true);
serverManager = new ServerManager(this); serverManager = new ServerManager(this);
regionManager = new RegionManager(this); regionManager = new RegionManager(this);
@ -1131,10 +1131,28 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
(event.getType().equals(EventType.NodeDeleted) && (event.getType().equals(EventType.NodeDeleted) &&
event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) && event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) &&
!shutdownRequested.get()) { !shutdownRequested.get()) {
LOG.error("Master lost its znode, killing itself now");
LOG.info("Master lost its znode, trying to get a new one");
// Can we still be the master? If not, goodbye
zooKeeperWrapper.close();
try {
zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
this.zkMasterAddressWatcher.setZookeeper(zooKeeperWrapper);
if(!this.zkMasterAddressWatcher.
writeAddressToZooKeeper(this.address,false)) {
throw new Exception("Another Master is currently active");
}
// Verify the cluster to see if anything happened while we were away
joinCluster();
} catch (Exception e) {
LOG.error("Killing master because of", e);
System.exit(1); System.exit(1);
} }
} }
}
private static void printUsageAndExit() { private static void printUsageAndExit() {
System.err.println("Usage: Master start|stop"); System.err.println("Usage: Master start|stop");

View File

@ -125,8 +125,6 @@ public class RegionManager implements HConstants {
regionsToFlush = Collections.synchronizedSortedMap( regionsToFlush = Collections.synchronizedSortedMap(
new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>> new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
(Bytes.BYTES_COMPARATOR)); (Bytes.BYTES_COMPARATOR));
private final ZooKeeperWrapper zooKeeperWrapper;
private final int zooKeeperNumRetries; private final int zooKeeperNumRetries;
private final int zooKeeperPause; private final int zooKeeperPause;
@ -143,7 +141,6 @@ public class RegionManager implements HConstants {
// Scans the meta table // Scans the meta table
metaScannerThread = new MetaScanner(master); metaScannerThread = new MetaScanner(master);
zooKeeperWrapper = master.getZooKeeperWrapper();
zooKeeperNumRetries = conf.getInt(ZOOKEEPER_RETRIES, DEFAULT_ZOOKEEPER_RETRIES); zooKeeperNumRetries = conf.getInt(ZOOKEEPER_RETRIES, DEFAULT_ZOOKEEPER_RETRIES);
zooKeeperPause = conf.getInt(ZOOKEEPER_PAUSE, DEFAULT_ZOOKEEPER_PAUSE); zooKeeperPause = conf.getInt(ZOOKEEPER_PAUSE, DEFAULT_ZOOKEEPER_PAUSE);
@ -602,8 +599,8 @@ public class RegionManager implements HConstants {
} catch(Exception iex) { } catch(Exception iex) {
LOG.warn("meta scanner", iex); LOG.warn("meta scanner", iex);
} }
zooKeeperWrapper.clearRSDirectory(); master.getZooKeeperWrapper().clearRSDirectory();
zooKeeperWrapper.close(); master.getZooKeeperWrapper().close();
} }
/** /**
@ -1121,7 +1118,7 @@ public class RegionManager implements HConstants {
private void writeRootRegionLocationToZooKeeper(HServerAddress address) { private void writeRootRegionLocationToZooKeeper(HServerAddress address) {
for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) { for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) {
if (zooKeeperWrapper.writeRootRegionLocation(address)) { if (master.getZooKeeperWrapper().writeRootRegionLocation(address)) {
return; return;
} }

View File

@ -41,13 +41,14 @@ import org.apache.zookeeper.Watcher.Event.EventType;
*/ */
class ZKMasterAddressWatcher implements Watcher { class ZKMasterAddressWatcher implements Watcher {
private static final Log LOG = LogFactory.getLog(ZKMasterAddressWatcher.class); private static final Log LOG = LogFactory.getLog(ZKMasterAddressWatcher.class);
private final ZooKeeperWrapper zookeeper;
private ZooKeeperWrapper zookeeper;
private final AtomicBoolean requestShutdown; private final AtomicBoolean requestShutdown;
/** /**
* Create this watcher using passed ZooKeeperWrapper instance. * Create this watcher using passed ZooKeeperWrapper instance.
* @param zk ZooKeeper * @param zk ZooKeeper
* @param requestShutdown Flag to set to request shutdown. * @param flag Flag to set to request shutdown.
*/ */
ZKMasterAddressWatcher(final ZooKeeperWrapper zk, final AtomicBoolean flag) { ZKMasterAddressWatcher(final ZooKeeperWrapper zk, final AtomicBoolean flag) {
this.requestShutdown = flag; this.requestShutdown = flag;
@ -98,17 +99,30 @@ class ZKMasterAddressWatcher implements Watcher {
* address (or until cluster shutdown). * address (or until cluster shutdown).
* @param address Address whose format is HServerAddress.toString * @param address Address whose format is HServerAddress.toString
*/ */
void writeAddressToZooKeeper(final HServerAddress address) { boolean writeAddressToZooKeeper(
while (true) { final HServerAddress address, boolean retry) {
do {
waitForMasterAddressAvailability(); waitForMasterAddressAvailability();
// Check if we need to shutdown instead of taking control // Check if we need to shutdown instead of taking control
if (this.requestShutdown.get()) return; if (this.requestShutdown.get()) {
LOG.debug("Won't start Master because cluster is shuting down");
return false;
}
if(this.zookeeper.writeMasterAddress(address)) { if(this.zookeeper.writeMasterAddress(address)) {
this.zookeeper.setClusterState(true); this.zookeeper.setClusterState(true);
// Watch our own node // Watch our own node
this.zookeeper.readMasterAddress(this); this.zookeeper.readMasterAddress(this);
return; return true;
} }
} while(retry);
return false;
} }
/**
* Reset the ZK in case a new connection is required
* @param zookeeper new instance
*/
public void setZookeeper(ZooKeeperWrapper zookeeper) {
this.zookeeper = zookeeper;
} }
} }

View File

@ -95,21 +95,29 @@ public class TestZooKeeper extends HBaseClusterTestCase {
connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY); connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY);
} }
public void testRegionServerSessionExpired() { public void testRegionServerSessionExpired() throws Exception{
try {
this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true); this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
new HTable(conf, HConstants.META_TABLE_NAME); new HTable(conf, HConstants.META_TABLE_NAME);
HRegionServer rs = cluster.getRegionServer(0);
sessionExpirationHelper(rs.getZooKeeperWrapper());
}
public void testMasterSessionExpired() throws Exception {
new HTable(conf, HConstants.META_TABLE_NAME);
HMaster master = cluster.getMaster();
sessionExpirationHelper(master.getZooKeeperWrapper());
}
public void sessionExpirationHelper(ZooKeeperWrapper nodeZK) throws Exception{
ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance); ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
String quorumServers = zkw.getQuorumServers(); String quorumServers = zkw.getQuorumServers();
int sessionTimeout = 5 * 1000; // 5 seconds int sessionTimeout = 5 * 1000; // 5 seconds
HRegionServer rs = cluster.getRegionServer(0); byte[] password = nodeZK.getSessionPassword();
ZooKeeperWrapper rsZK = rs.getZooKeeperWrapper(); long sessionID = nodeZK.getSessionID();
long sessionID = rsZK.getSessionID();
byte[] password = rsZK.getSessionPassword();
ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance, sessionID, password); ZooKeeper zk = new ZooKeeper(quorumServers,
sessionTimeout, EmptyWatcher.instance, sessionID, password);
zk.close(); zk.close();
Thread.sleep(sessionTimeout * 3L); Thread.sleep(sessionTimeout * 3L);
@ -126,10 +134,7 @@ public class TestZooKeeper extends HBaseClusterTestCase {
Put put = new Put(Bytes.toBytes("testrow")); Put put = new Put(Bytes.toBytes("testrow"));
put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata")); put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
table.put(put); table.put(put);
} catch (Exception e) {
e.printStackTrace();
fail();
}
} }
public void testMultipleZK() { public void testMultipleZK() {