HBASE-1921 When the Master's session times out and there's only one, cluster is wedged
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@830820 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
67c1fc3436
commit
c7df216c11
|
@ -143,6 +143,7 @@ Release 0.21.0 - Unreleased
|
||||||
HBASE-1756 Refactor HLog (changing package first)
|
HBASE-1756 Refactor HLog (changing package first)
|
||||||
HBASE-1926 Remove unused xmlenc jar from trunk
|
HBASE-1926 Remove unused xmlenc jar from trunk
|
||||||
HBASE-1936 HLog group commit
|
HBASE-1936 HLog group commit
|
||||||
|
HBASE-1921 When the Master's session times out and there's only one, cluster is wedged
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
HBASE-410 [testing] Speed up the test suite
|
HBASE-410 [testing] Speed up the test suite
|
||||||
|
|
|
@ -126,7 +126,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
||||||
// Metrics is set when we call run.
|
// Metrics is set when we call run.
|
||||||
private final MasterMetrics metrics;
|
private final MasterMetrics metrics;
|
||||||
// Our zk client.
|
// Our zk client.
|
||||||
private final ZooKeeperWrapper zooKeeperWrapper;
|
private ZooKeeperWrapper zooKeeperWrapper;
|
||||||
// Watcher for master address and for cluster shutdown.
|
// Watcher for master address and for cluster shutdown.
|
||||||
private final ZKMasterAddressWatcher zkMasterAddressWatcher;
|
private final ZKMasterAddressWatcher zkMasterAddressWatcher;
|
||||||
// A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo.
|
// A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo.
|
||||||
|
@ -187,7 +187,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
||||||
this.zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
|
this.zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
|
||||||
this.zkMasterAddressWatcher =
|
this.zkMasterAddressWatcher =
|
||||||
new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
|
new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
|
||||||
this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address);
|
this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address, true);
|
||||||
|
|
||||||
serverManager = new ServerManager(this);
|
serverManager = new ServerManager(this);
|
||||||
regionManager = new RegionManager(this);
|
regionManager = new RegionManager(this);
|
||||||
|
@ -1131,10 +1131,28 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
||||||
(event.getType().equals(EventType.NodeDeleted) &&
|
(event.getType().equals(EventType.NodeDeleted) &&
|
||||||
event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) &&
|
event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) &&
|
||||||
!shutdownRequested.get()) {
|
!shutdownRequested.get()) {
|
||||||
LOG.error("Master lost its znode, killing itself now");
|
|
||||||
|
LOG.info("Master lost its znode, trying to get a new one");
|
||||||
|
|
||||||
|
// Can we still be the master? If not, goodbye
|
||||||
|
|
||||||
|
zooKeeperWrapper.close();
|
||||||
|
try {
|
||||||
|
zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
|
||||||
|
this.zkMasterAddressWatcher.setZookeeper(zooKeeperWrapper);
|
||||||
|
if(!this.zkMasterAddressWatcher.
|
||||||
|
writeAddressToZooKeeper(this.address,false)) {
|
||||||
|
throw new Exception("Another Master is currently active");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the cluster to see if anything happened while we were away
|
||||||
|
joinCluster();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Killing master because of", e);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void printUsageAndExit() {
|
private static void printUsageAndExit() {
|
||||||
System.err.println("Usage: Master start|stop");
|
System.err.println("Usage: Master start|stop");
|
||||||
|
|
|
@ -125,8 +125,6 @@ public class RegionManager implements HConstants {
|
||||||
regionsToFlush = Collections.synchronizedSortedMap(
|
regionsToFlush = Collections.synchronizedSortedMap(
|
||||||
new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
|
new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
|
||||||
(Bytes.BYTES_COMPARATOR));
|
(Bytes.BYTES_COMPARATOR));
|
||||||
|
|
||||||
private final ZooKeeperWrapper zooKeeperWrapper;
|
|
||||||
private final int zooKeeperNumRetries;
|
private final int zooKeeperNumRetries;
|
||||||
private final int zooKeeperPause;
|
private final int zooKeeperPause;
|
||||||
|
|
||||||
|
@ -143,7 +141,6 @@ public class RegionManager implements HConstants {
|
||||||
// Scans the meta table
|
// Scans the meta table
|
||||||
metaScannerThread = new MetaScanner(master);
|
metaScannerThread = new MetaScanner(master);
|
||||||
|
|
||||||
zooKeeperWrapper = master.getZooKeeperWrapper();
|
|
||||||
zooKeeperNumRetries = conf.getInt(ZOOKEEPER_RETRIES, DEFAULT_ZOOKEEPER_RETRIES);
|
zooKeeperNumRetries = conf.getInt(ZOOKEEPER_RETRIES, DEFAULT_ZOOKEEPER_RETRIES);
|
||||||
zooKeeperPause = conf.getInt(ZOOKEEPER_PAUSE, DEFAULT_ZOOKEEPER_PAUSE);
|
zooKeeperPause = conf.getInt(ZOOKEEPER_PAUSE, DEFAULT_ZOOKEEPER_PAUSE);
|
||||||
|
|
||||||
|
@ -602,8 +599,8 @@ public class RegionManager implements HConstants {
|
||||||
} catch(Exception iex) {
|
} catch(Exception iex) {
|
||||||
LOG.warn("meta scanner", iex);
|
LOG.warn("meta scanner", iex);
|
||||||
}
|
}
|
||||||
zooKeeperWrapper.clearRSDirectory();
|
master.getZooKeeperWrapper().clearRSDirectory();
|
||||||
zooKeeperWrapper.close();
|
master.getZooKeeperWrapper().close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1121,7 +1118,7 @@ public class RegionManager implements HConstants {
|
||||||
|
|
||||||
private void writeRootRegionLocationToZooKeeper(HServerAddress address) {
|
private void writeRootRegionLocationToZooKeeper(HServerAddress address) {
|
||||||
for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) {
|
for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) {
|
||||||
if (zooKeeperWrapper.writeRootRegionLocation(address)) {
|
if (master.getZooKeeperWrapper().writeRootRegionLocation(address)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,13 +41,14 @@ import org.apache.zookeeper.Watcher.Event.EventType;
|
||||||
*/
|
*/
|
||||||
class ZKMasterAddressWatcher implements Watcher {
|
class ZKMasterAddressWatcher implements Watcher {
|
||||||
private static final Log LOG = LogFactory.getLog(ZKMasterAddressWatcher.class);
|
private static final Log LOG = LogFactory.getLog(ZKMasterAddressWatcher.class);
|
||||||
private final ZooKeeperWrapper zookeeper;
|
|
||||||
|
private ZooKeeperWrapper zookeeper;
|
||||||
private final AtomicBoolean requestShutdown;
|
private final AtomicBoolean requestShutdown;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create this watcher using passed ZooKeeperWrapper instance.
|
* Create this watcher using passed ZooKeeperWrapper instance.
|
||||||
* @param zk ZooKeeper
|
* @param zk ZooKeeper
|
||||||
* @param requestShutdown Flag to set to request shutdown.
|
* @param flag Flag to set to request shutdown.
|
||||||
*/
|
*/
|
||||||
ZKMasterAddressWatcher(final ZooKeeperWrapper zk, final AtomicBoolean flag) {
|
ZKMasterAddressWatcher(final ZooKeeperWrapper zk, final AtomicBoolean flag) {
|
||||||
this.requestShutdown = flag;
|
this.requestShutdown = flag;
|
||||||
|
@ -98,17 +99,30 @@ class ZKMasterAddressWatcher implements Watcher {
|
||||||
* address (or until cluster shutdown).
|
* address (or until cluster shutdown).
|
||||||
* @param address Address whose format is HServerAddress.toString
|
* @param address Address whose format is HServerAddress.toString
|
||||||
*/
|
*/
|
||||||
void writeAddressToZooKeeper(final HServerAddress address) {
|
boolean writeAddressToZooKeeper(
|
||||||
while (true) {
|
final HServerAddress address, boolean retry) {
|
||||||
|
do {
|
||||||
waitForMasterAddressAvailability();
|
waitForMasterAddressAvailability();
|
||||||
// Check if we need to shutdown instead of taking control
|
// Check if we need to shutdown instead of taking control
|
||||||
if (this.requestShutdown.get()) return;
|
if (this.requestShutdown.get()) {
|
||||||
|
LOG.debug("Won't start Master because cluster is shuting down");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if(this.zookeeper.writeMasterAddress(address)) {
|
if(this.zookeeper.writeMasterAddress(address)) {
|
||||||
this.zookeeper.setClusterState(true);
|
this.zookeeper.setClusterState(true);
|
||||||
// Watch our own node
|
// Watch our own node
|
||||||
this.zookeeper.readMasterAddress(this);
|
this.zookeeper.readMasterAddress(this);
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
} while(retry);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset the ZK in case a new connection is required
|
||||||
|
* @param zookeeper new instance
|
||||||
|
*/
|
||||||
|
public void setZookeeper(ZooKeeperWrapper zookeeper) {
|
||||||
|
this.zookeeper = zookeeper;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -95,21 +95,29 @@ public class TestZooKeeper extends HBaseClusterTestCase {
|
||||||
connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY);
|
connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRegionServerSessionExpired() {
|
public void testRegionServerSessionExpired() throws Exception{
|
||||||
try {
|
|
||||||
this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
|
this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
|
||||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
|
HRegionServer rs = cluster.getRegionServer(0);
|
||||||
|
sessionExpirationHelper(rs.getZooKeeperWrapper());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMasterSessionExpired() throws Exception {
|
||||||
|
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
|
HMaster master = cluster.getMaster();
|
||||||
|
sessionExpirationHelper(master.getZooKeeperWrapper());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void sessionExpirationHelper(ZooKeeperWrapper nodeZK) throws Exception{
|
||||||
ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
|
ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
|
||||||
String quorumServers = zkw.getQuorumServers();
|
String quorumServers = zkw.getQuorumServers();
|
||||||
int sessionTimeout = 5 * 1000; // 5 seconds
|
int sessionTimeout = 5 * 1000; // 5 seconds
|
||||||
|
|
||||||
HRegionServer rs = cluster.getRegionServer(0);
|
byte[] password = nodeZK.getSessionPassword();
|
||||||
ZooKeeperWrapper rsZK = rs.getZooKeeperWrapper();
|
long sessionID = nodeZK.getSessionID();
|
||||||
long sessionID = rsZK.getSessionID();
|
|
||||||
byte[] password = rsZK.getSessionPassword();
|
|
||||||
|
|
||||||
ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance, sessionID, password);
|
ZooKeeper zk = new ZooKeeper(quorumServers,
|
||||||
|
sessionTimeout, EmptyWatcher.instance, sessionID, password);
|
||||||
zk.close();
|
zk.close();
|
||||||
|
|
||||||
Thread.sleep(sessionTimeout * 3L);
|
Thread.sleep(sessionTimeout * 3L);
|
||||||
|
@ -126,10 +134,7 @@ public class TestZooKeeper extends HBaseClusterTestCase {
|
||||||
Put put = new Put(Bytes.toBytes("testrow"));
|
Put put = new Put(Bytes.toBytes("testrow"));
|
||||||
put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
|
put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
|
||||||
table.put(put);
|
table.put(put);
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
fail();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMultipleZK() {
|
public void testMultipleZK() {
|
||||||
|
|
Loading…
Reference in New Issue