HBASE-1232 zookeeper client wont reconnect if there is a problem
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@759821 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c150f02537
commit
e1888e57f5
@ -66,6 +66,8 @@ Release 0.20.0 - Unreleased
|
|||||||
HBASE-1293 hfile doesn't recycle decompressors (Ryan Rawson via Andrew
|
HBASE-1293 hfile doesn't recycle decompressors (Ryan Rawson via Andrew
|
||||||
Purtell)
|
Purtell)
|
||||||
HBASE-1150 HMsg carries safemode flag; remove (Nitay Joffe via Stack)
|
HBASE-1150 HMsg carries safemode flag; remove (Nitay Joffe via Stack)
|
||||||
|
HBASE-1232 zookeeper client wont reconnect if there is a problem (Nitay
|
||||||
|
Joffe via Andrew Purtell)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
|
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
|
||||||
|
@ -29,12 +29,20 @@ import org.apache.hadoop.hbase.MasterNotRunningException;
|
|||||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||||
import org.apache.hadoop.hbase.ipc.HMasterInterface;
|
import org.apache.hadoop.hbase.ipc.HMasterInterface;
|
||||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||||
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cluster connection.
|
* Cluster connection.
|
||||||
* {@link HConnectionManager} manages instances of this class.
|
* {@link HConnectionManager} manages instances of this class.
|
||||||
*/
|
*/
|
||||||
public interface HConnection {
|
public interface HConnection {
|
||||||
|
/**
|
||||||
|
* Retrieve ZooKeeperWrapper used by the connection.
|
||||||
|
* @return ZooKeeperWrapper handle being used by the connection.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public ZooKeeperWrapper getZooKeeperWrapper() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return proxy connection to master server for this instance
|
* @return proxy connection to master server for this instance
|
||||||
* @throws MasterNotRunningException
|
* @throws MasterNotRunningException
|
||||||
|
@ -57,6 +57,9 @@ import org.apache.hadoop.hbase.util.SoftValueSortedMap;
|
|||||||
import org.apache.hadoop.hbase.util.Writables;
|
import org.apache.hadoop.hbase.util.Writables;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
|
import org.apache.zookeeper.WatchedEvent;
|
||||||
|
import org.apache.zookeeper.Watcher;
|
||||||
|
import org.apache.zookeeper.Watcher.Event.KeeperState;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A non-instantiable class that manages connections to multiple tables in
|
* A non-instantiable class that manages connections to multiple tables in
|
||||||
@ -114,7 +117,7 @@ public class HConnectionManager implements HConstants {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Encapsulates finding the servers for an HBase instance */
|
/* Encapsulates finding the servers for an HBase instance */
|
||||||
private static class TableServers implements ServerConnection, HConstants {
|
private static class TableServers implements ServerConnection, HConstants, Watcher {
|
||||||
private static final Log LOG = LogFactory.getLog(TableServers.class);
|
private static final Log LOG = LogFactory.getLog(TableServers.class);
|
||||||
private final Class<? extends HRegionInterface> serverInterfaceClass;
|
private final Class<? extends HRegionInterface> serverInterfaceClass;
|
||||||
private final long pause;
|
private final long pause;
|
||||||
@ -182,6 +185,29 @@ public class HConnectionManager implements HConstants {
|
|||||||
return this.pause * HConstants.RETRY_BACKOFF[ntries];
|
return this.pause * HConstants.RETRY_BACKOFF[ntries];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called by ZooKeeper when an event occurs on our connection. We use this to
|
||||||
|
* detect our session expiring. When our session expires, we have lost our
|
||||||
|
* connection to ZooKeeper. Our handle is dead, and we need to recreate it.
|
||||||
|
*
|
||||||
|
* See http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions
|
||||||
|
* for more information.
|
||||||
|
*
|
||||||
|
* @param event WatchedEvent witnessed by ZooKeeper.
|
||||||
|
*/
|
||||||
|
public void process(WatchedEvent event) {
|
||||||
|
KeeperState state = event.getState();
|
||||||
|
LOG.debug("Got ZooKeeper event, state: " + state + ", type: " +
|
||||||
|
event.getType() + ", path: " + event.getPath());
|
||||||
|
if (state == KeeperState.Expired) {
|
||||||
|
resetZooKeeper();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void resetZooKeeper() {
|
||||||
|
zooKeeperWrapper = null;
|
||||||
|
}
|
||||||
|
|
||||||
// Used by master and region servers during safe mode only
|
// Used by master and region servers during safe mode only
|
||||||
public void unsetRootRegionLocation() {
|
public void unsetRootRegionLocation() {
|
||||||
this.rootRegionLocation = null;
|
this.rootRegionLocation = null;
|
||||||
@ -197,8 +223,9 @@ public class HConnectionManager implements HConstants {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public HMasterInterface getMaster() throws MasterNotRunningException {
|
public HMasterInterface getMaster() throws MasterNotRunningException {
|
||||||
|
ZooKeeperWrapper zk = null;
|
||||||
try {
|
try {
|
||||||
getZooKeeperWrapper();
|
zk = getZooKeeperWrapper();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new MasterNotRunningException(e);
|
throw new MasterNotRunningException(e);
|
||||||
}
|
}
|
||||||
@ -212,7 +239,7 @@ public class HConnectionManager implements HConstants {
|
|||||||
tries++) {
|
tries++) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
masterLocation = zooKeeperWrapper.readMasterAddressOrThrow();
|
masterLocation = zk.readMasterAddressOrThrow();
|
||||||
|
|
||||||
HMasterInterface tryMaster = (HMasterInterface)HBaseRPC.getProxy(
|
HMasterInterface tryMaster = (HMasterInterface)HBaseRPC.getProxy(
|
||||||
HMasterInterface.class, HBaseRPCProtocolVersion.versionID,
|
HMasterInterface.class, HBaseRPCProtocolVersion.versionID,
|
||||||
@ -758,9 +785,9 @@ public class HConnectionManager implements HConstants {
|
|||||||
return server;
|
return server;
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized ZooKeeperWrapper getZooKeeperWrapper() throws IOException {
|
public synchronized ZooKeeperWrapper getZooKeeperWrapper() throws IOException {
|
||||||
if (zooKeeperWrapper == null) {
|
if (zooKeeperWrapper == null) {
|
||||||
zooKeeperWrapper = new ZooKeeperWrapper(conf);
|
zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
|
||||||
}
|
}
|
||||||
return zooKeeperWrapper;
|
return zooKeeperWrapper;
|
||||||
}
|
}
|
||||||
@ -778,7 +805,7 @@ public class HConnectionManager implements HConstants {
|
|||||||
|
|
||||||
// We lazily instantiate the ZooKeeper object because we don't want to
|
// We lazily instantiate the ZooKeeper object because we don't want to
|
||||||
// make the constructor have to throw IOException or handle it itself.
|
// make the constructor have to throw IOException or handle it itself.
|
||||||
ZooKeeperWrapper zooKeeperWrapper = getZooKeeperWrapper();
|
ZooKeeperWrapper zk = getZooKeeperWrapper();
|
||||||
|
|
||||||
HServerAddress rootRegionAddress = null;
|
HServerAddress rootRegionAddress = null;
|
||||||
for (int tries = 0; tries < numRetries; tries++) {
|
for (int tries = 0; tries < numRetries; tries++) {
|
||||||
@ -787,9 +814,9 @@ public class HConnectionManager implements HConstants {
|
|||||||
while (rootRegionAddress == null && localTimeouts < numRetries) {
|
while (rootRegionAddress == null && localTimeouts < numRetries) {
|
||||||
// Don't read root region until we're out of safe mode so we know
|
// Don't read root region until we're out of safe mode so we know
|
||||||
// that the meta regions have been assigned.
|
// that the meta regions have been assigned.
|
||||||
boolean outOfSafeMode = zooKeeperWrapper.checkOutOfSafeMode();
|
boolean outOfSafeMode = zk.checkOutOfSafeMode();
|
||||||
if (outOfSafeMode) {
|
if (outOfSafeMode) {
|
||||||
rootRegionAddress = zooKeeperWrapper.readRootRegionLocation();
|
rootRegionAddress = zk.readRootRegionLocation();
|
||||||
}
|
}
|
||||||
if (rootRegionAddress == null) {
|
if (rootRegionAddress == null) {
|
||||||
try {
|
try {
|
||||||
|
@ -113,6 +113,24 @@ public class ZooKeeperWrapper implements HConstants {
|
|||||||
masterElectionZNode = getZNode(parentZNode, masterAddressZNodeName);
|
masterElectionZNode = getZNode(parentZNode, masterAddressZNodeName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is for testing KeeperException.SessionExpiredExcseption.
|
||||||
|
* See HBASE-1232.
|
||||||
|
* @return long session ID of this ZooKeeper session.
|
||||||
|
*/
|
||||||
|
public long getSessionID() {
|
||||||
|
return zooKeeper.getSessionId();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is for testing KeeperException.SessionExpiredExcseption.
|
||||||
|
* See HBASE-1232.
|
||||||
|
* @return byte[] password of this ZooKeeper session.
|
||||||
|
*/
|
||||||
|
public byte[] getSessionPassword() {
|
||||||
|
return zooKeeper.getSessionPasswd();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is for tests to directly set the ZooKeeper quorum servers.
|
* This is for tests to directly set the ZooKeeper quorum servers.
|
||||||
* @param servers comma separated host:port ZooKeeper quorum servers.
|
* @param servers comma separated host:port ZooKeeper quorum servers.
|
||||||
|
@ -564,7 +564,7 @@ public abstract class HBaseTestCase extends TestCase {
|
|||||||
*
|
*
|
||||||
* Sets the configuration parameter TEST_DIRECTORY_KEY if not already set.
|
* Sets the configuration parameter TEST_DIRECTORY_KEY if not already set.
|
||||||
* Sets the boolean debugging if "DEBUGGING" is set in the environment.
|
* Sets the boolean debugging if "DEBUGGING" is set in the environment.
|
||||||
* If debugging is enabled, reconfigures loggin so that the root log level is
|
* If debugging is enabled, reconfigures logging so that the root log level is
|
||||||
* set to WARN and the logging level for the package is set to DEBUG.
|
* set to WARN and the logging level for the package is set to DEBUG.
|
||||||
*/
|
*/
|
||||||
public static void initialize() {
|
public static void initialize() {
|
||||||
|
@ -58,16 +58,15 @@ public class MiniZooKeeperCluster {
|
|||||||
private boolean started;
|
private boolean started;
|
||||||
private int numPeers;
|
private int numPeers;
|
||||||
private File baseDir;
|
private File baseDir;
|
||||||
|
private String quorumServers;
|
||||||
|
|
||||||
// for distributed mode.
|
// for distributed mode.
|
||||||
private QuorumPeer[] quorumPeers;
|
private QuorumPeer[] quorumPeers;
|
||||||
// for standalone mode.
|
// for standalone mode.
|
||||||
private NIOServerCnxn.Factory standaloneServerFactory;
|
private NIOServerCnxn.Factory standaloneServerFactory;
|
||||||
|
|
||||||
/**
|
/** Create mini ZooKeeper cluster. */
|
||||||
* @throws IOException
|
public MiniZooKeeperCluster() {
|
||||||
*/
|
|
||||||
public MiniZooKeeperCluster() throws IOException {
|
|
||||||
this.started = false;
|
this.started = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,6 +80,13 @@ public class MiniZooKeeperCluster {
|
|||||||
FileTxnLog.setPreallocSize(100);
|
FileTxnLog.setPreallocSize(100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return String ZooKeeper quorum servers.
|
||||||
|
*/
|
||||||
|
public String getQuorumServers() {
|
||||||
|
return quorumServers;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param numPeers
|
* @param numPeers
|
||||||
* @param baseDir
|
* @param baseDir
|
||||||
@ -116,7 +122,8 @@ public class MiniZooKeeperCluster {
|
|||||||
standaloneServerFactory = new NIOServerCnxn.Factory(CLIENT_PORT_START);
|
standaloneServerFactory = new NIOServerCnxn.Factory(CLIENT_PORT_START);
|
||||||
standaloneServerFactory.startup(server);
|
standaloneServerFactory.startup(server);
|
||||||
|
|
||||||
ZooKeeperWrapper.setQuorumServers("localhost:" + CLIENT_PORT_START);
|
quorumServers = "localhost:" + CLIENT_PORT_START;
|
||||||
|
ZooKeeperWrapper.setQuorumServers(quorumServers);
|
||||||
|
|
||||||
if (!waitForServerUp(CLIENT_PORT_START, CONNECTION_TIMEOUT)) {
|
if (!waitForServerUp(CLIENT_PORT_START, CONNECTION_TIMEOUT)) {
|
||||||
throw new IOException("Waiting for startup of standalone server");
|
throw new IOException("Waiting for startup of standalone server");
|
||||||
@ -152,8 +159,8 @@ public class MiniZooKeeperCluster {
|
|||||||
serversBuffer.append("localhost:" + port);
|
serversBuffer.append("localhost:" + port);
|
||||||
}
|
}
|
||||||
|
|
||||||
String servers = serversBuffer.toString();
|
quorumServers = serversBuffer.toString();
|
||||||
ZooKeeperWrapper.setQuorumServers(servers);
|
ZooKeeperWrapper.setQuorumServers(quorumServers);
|
||||||
|
|
||||||
// Start quorum peer threads.
|
// Start quorum peer threads.
|
||||||
for (QuorumPeer qp : quorumPeers) {
|
for (QuorumPeer qp : quorumPeers) {
|
||||||
|
@ -21,14 +21,24 @@ package org.apache.hadoop.hbase;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.client.HConnection;
|
||||||
|
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||||
import org.apache.hadoop.hbase.client.HTable;
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
import org.apache.hadoop.hbase.master.HMaster;
|
import org.apache.hadoop.hbase.master.HMaster;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
|
||||||
|
import org.apache.zookeeper.WatchedEvent;
|
||||||
|
import org.apache.zookeeper.Watcher;
|
||||||
|
import org.apache.zookeeper.ZooKeeper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class TestZooKeeper extends HBaseClusterTestCase {
|
public class TestZooKeeper extends HBaseClusterTestCase {
|
||||||
|
private static class EmptyWatcher implements Watcher {
|
||||||
|
public EmptyWatcher() {}
|
||||||
|
public void process(WatchedEvent event) {}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
setOpenMetaTable(false);
|
setOpenMetaTable(false);
|
||||||
@ -71,4 +81,29 @@ public class TestZooKeeper extends HBaseClusterTestCase {
|
|||||||
ZooKeeperWrapper zooKeeper = new ZooKeeperWrapper(conf);
|
ZooKeeperWrapper zooKeeper = new ZooKeeperWrapper(conf);
|
||||||
assertTrue(zooKeeper.writeOutOfSafeMode());
|
assertTrue(zooKeeper.writeOutOfSafeMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See HBASE-1232 and http://wiki.apache.org/hadoop/ZooKeeper/FAQ#4.
|
||||||
|
* @throws IOException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public void testClientSessionExpired() throws IOException, InterruptedException {
|
||||||
|
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
|
|
||||||
|
String quorumServers = zooKeeperCluster.getQuorumServers();
|
||||||
|
int sessionTimeout = conf.getInt("zookeeper.session.timeout", 2 * 1000);
|
||||||
|
Watcher watcher = new EmptyWatcher();
|
||||||
|
HConnection connection = HConnectionManager.getConnection(conf);
|
||||||
|
ZooKeeperWrapper connectionZK = connection.getZooKeeperWrapper();
|
||||||
|
long sessionID = connectionZK.getSessionID();
|
||||||
|
byte[] password = connectionZK.getSessionPassword();
|
||||||
|
|
||||||
|
ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, watcher, sessionID, password);
|
||||||
|
zk.close();
|
||||||
|
|
||||||
|
Thread.sleep(sessionTimeout * 3);
|
||||||
|
|
||||||
|
System.err.println("ZooKeeper should have timed out");
|
||||||
|
connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user