HBASE-1921 When the Master's session times out and there's only one, cluster is wedged
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@830820 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
67c1fc3436
commit
c7df216c11
|
@ -143,6 +143,7 @@ Release 0.21.0 - Unreleased
|
|||
HBASE-1756 Refactor HLog (changing package first)
|
||||
HBASE-1926 Remove unused xmlenc jar from trunk
|
||||
HBASE-1936 HLog group commit
|
||||
HBASE-1921 When the Master's session times out and there's only one, cluster is wedged
|
||||
|
||||
OPTIMIZATIONS
|
||||
HBASE-410 [testing] Speed up the test suite
|
||||
|
|
|
@ -126,7 +126,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
// Metrics is set when we call run.
|
||||
private final MasterMetrics metrics;
|
||||
// Our zk client.
|
||||
private final ZooKeeperWrapper zooKeeperWrapper;
|
||||
private ZooKeeperWrapper zooKeeperWrapper;
|
||||
// Watcher for master address and for cluster shutdown.
|
||||
private final ZKMasterAddressWatcher zkMasterAddressWatcher;
|
||||
// A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo.
|
||||
|
@ -187,7 +187,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
this.zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
|
||||
this.zkMasterAddressWatcher =
|
||||
new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
|
||||
this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address);
|
||||
this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address, true);
|
||||
|
||||
serverManager = new ServerManager(this);
|
||||
regionManager = new RegionManager(this);
|
||||
|
@ -1131,8 +1131,26 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
(event.getType().equals(EventType.NodeDeleted) &&
|
||||
event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) &&
|
||||
!shutdownRequested.get()) {
|
||||
LOG.error("Master lost its znode, killing itself now");
|
||||
System.exit(1);
|
||||
|
||||
LOG.info("Master lost its znode, trying to get a new one");
|
||||
|
||||
// Can we still be the master? If not, goodbye
|
||||
|
||||
zooKeeperWrapper.close();
|
||||
try {
|
||||
zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
|
||||
this.zkMasterAddressWatcher.setZookeeper(zooKeeperWrapper);
|
||||
if(!this.zkMasterAddressWatcher.
|
||||
writeAddressToZooKeeper(this.address,false)) {
|
||||
throw new Exception("Another Master is currently active");
|
||||
}
|
||||
|
||||
// Verify the cluster to see if anything happened while we were away
|
||||
joinCluster();
|
||||
} catch (Exception e) {
|
||||
LOG.error("Killing master because of", e);
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -125,8 +125,6 @@ public class RegionManager implements HConstants {
|
|||
regionsToFlush = Collections.synchronizedSortedMap(
|
||||
new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
|
||||
(Bytes.BYTES_COMPARATOR));
|
||||
|
||||
private final ZooKeeperWrapper zooKeeperWrapper;
|
||||
private final int zooKeeperNumRetries;
|
||||
private final int zooKeeperPause;
|
||||
|
||||
|
@ -143,7 +141,6 @@ public class RegionManager implements HConstants {
|
|||
// Scans the meta table
|
||||
metaScannerThread = new MetaScanner(master);
|
||||
|
||||
zooKeeperWrapper = master.getZooKeeperWrapper();
|
||||
zooKeeperNumRetries = conf.getInt(ZOOKEEPER_RETRIES, DEFAULT_ZOOKEEPER_RETRIES);
|
||||
zooKeeperPause = conf.getInt(ZOOKEEPER_PAUSE, DEFAULT_ZOOKEEPER_PAUSE);
|
||||
|
||||
|
@ -602,8 +599,8 @@ public class RegionManager implements HConstants {
|
|||
} catch(Exception iex) {
|
||||
LOG.warn("meta scanner", iex);
|
||||
}
|
||||
zooKeeperWrapper.clearRSDirectory();
|
||||
zooKeeperWrapper.close();
|
||||
master.getZooKeeperWrapper().clearRSDirectory();
|
||||
master.getZooKeeperWrapper().close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1121,7 +1118,7 @@ public class RegionManager implements HConstants {
|
|||
|
||||
private void writeRootRegionLocationToZooKeeper(HServerAddress address) {
|
||||
for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) {
|
||||
if (zooKeeperWrapper.writeRootRegionLocation(address)) {
|
||||
if (master.getZooKeeperWrapper().writeRootRegionLocation(address)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,13 +41,14 @@ import org.apache.zookeeper.Watcher.Event.EventType;
|
|||
*/
|
||||
class ZKMasterAddressWatcher implements Watcher {
|
||||
private static final Log LOG = LogFactory.getLog(ZKMasterAddressWatcher.class);
|
||||
private final ZooKeeperWrapper zookeeper;
|
||||
|
||||
private ZooKeeperWrapper zookeeper;
|
||||
private final AtomicBoolean requestShutdown;
|
||||
|
||||
/**
|
||||
* Create this watcher using passed ZooKeeperWrapper instance.
|
||||
* @param zk ZooKeeper
|
||||
* @param requestShutdown Flag to set to request shutdown.
|
||||
* @param flag Flag to set to request shutdown.
|
||||
*/
|
||||
ZKMasterAddressWatcher(final ZooKeeperWrapper zk, final AtomicBoolean flag) {
|
||||
this.requestShutdown = flag;
|
||||
|
@ -98,17 +99,30 @@ class ZKMasterAddressWatcher implements Watcher {
|
|||
* address (or until cluster shutdown).
|
||||
* @param address Address whose format is HServerAddress.toString
|
||||
*/
|
||||
void writeAddressToZooKeeper(final HServerAddress address) {
|
||||
while (true) {
|
||||
boolean writeAddressToZooKeeper(
|
||||
final HServerAddress address, boolean retry) {
|
||||
do {
|
||||
waitForMasterAddressAvailability();
|
||||
// Check if we need to shutdown instead of taking control
|
||||
if (this.requestShutdown.get()) return;
|
||||
if (this.requestShutdown.get()) {
|
||||
LOG.debug("Won't start Master because cluster is shuting down");
|
||||
return false;
|
||||
}
|
||||
if(this.zookeeper.writeMasterAddress(address)) {
|
||||
this.zookeeper.setClusterState(true);
|
||||
// Watch our own node
|
||||
this.zookeeper.readMasterAddress(this);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} while(retry);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the ZK in case a new connection is required
|
||||
* @param zookeeper new instance
|
||||
*/
|
||||
public void setZookeeper(ZooKeeperWrapper zookeeper) {
|
||||
this.zookeeper = zookeeper;
|
||||
}
|
||||
}
|
|
@ -95,41 +95,46 @@ public class TestZooKeeper extends HBaseClusterTestCase {
|
|||
connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY);
|
||||
}
|
||||
|
||||
public void testRegionServerSessionExpired() {
|
||||
try {
|
||||
this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
public void testRegionServerSessionExpired() throws Exception{
|
||||
this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
HRegionServer rs = cluster.getRegionServer(0);
|
||||
sessionExpirationHelper(rs.getZooKeeperWrapper());
|
||||
}
|
||||
|
||||
ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
|
||||
String quorumServers = zkw.getQuorumServers();
|
||||
int sessionTimeout = 5 * 1000; // 5 seconds
|
||||
public void testMasterSessionExpired() throws Exception {
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
HMaster master = cluster.getMaster();
|
||||
sessionExpirationHelper(master.getZooKeeperWrapper());
|
||||
}
|
||||
|
||||
HRegionServer rs = cluster.getRegionServer(0);
|
||||
ZooKeeperWrapper rsZK = rs.getZooKeeperWrapper();
|
||||
long sessionID = rsZK.getSessionID();
|
||||
byte[] password = rsZK.getSessionPassword();
|
||||
public void sessionExpirationHelper(ZooKeeperWrapper nodeZK) throws Exception{
|
||||
ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
|
||||
String quorumServers = zkw.getQuorumServers();
|
||||
int sessionTimeout = 5 * 1000; // 5 seconds
|
||||
|
||||
ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance, sessionID, password);
|
||||
zk.close();
|
||||
byte[] password = nodeZK.getSessionPassword();
|
||||
long sessionID = nodeZK.getSessionID();
|
||||
|
||||
Thread.sleep(sessionTimeout * 3L);
|
||||
ZooKeeper zk = new ZooKeeper(quorumServers,
|
||||
sessionTimeout, EmptyWatcher.instance, sessionID, password);
|
||||
zk.close();
|
||||
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
Thread.sleep(sessionTimeout * 3L);
|
||||
|
||||
HBaseAdmin admin = new HBaseAdmin(conf);
|
||||
HTableDescriptor desc = new HTableDescriptor("test");
|
||||
HColumnDescriptor family = new HColumnDescriptor("fam");
|
||||
desc.addFamily(family);
|
||||
admin.createTable(desc);
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
|
||||
HBaseAdmin admin = new HBaseAdmin(conf);
|
||||
HTableDescriptor desc = new HTableDescriptor("test");
|
||||
HColumnDescriptor family = new HColumnDescriptor("fam");
|
||||
desc.addFamily(family);
|
||||
admin.createTable(desc);
|
||||
|
||||
HTable table = new HTable("test");
|
||||
Put put = new Put(Bytes.toBytes("testrow"));
|
||||
put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
|
||||
table.put(put);
|
||||
|
||||
HTable table = new HTable("test");
|
||||
Put put = new Put(Bytes.toBytes("testrow"));
|
||||
put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
|
||||
table.put(put);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
fail();
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultipleZK() {
|
||||
|
|
Loading…
Reference in New Issue