HBASE-3062 ZooKeeper KeeperException is a recoverable exception; we should retry a while on server startup at least.
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1006202 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b7f3cfbf05
commit
71d6b124dd
|
@ -571,6 +571,9 @@ Release 0.21.0 - Unreleased
|
||||||
HBASE-3008 Memstore.updateColumnValue passes wrong flag to heapSizeChange
|
HBASE-3008 Memstore.updateColumnValue passes wrong flag to heapSizeChange
|
||||||
(Causes memstore size to go negative)
|
(Causes memstore size to go negative)
|
||||||
HBASE-3089 REST tests are broken locally and up in hudson
|
HBASE-3089 REST tests are broken locally and up in hudson
|
||||||
|
HBASE-3062 ZooKeeper KeeperException$ConnectionLossException is a
|
||||||
|
"recoverable" exception; we should retry a while on server
|
||||||
|
startup at least.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -402,12 +402,18 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
private void initialize() throws IOException, InterruptedException {
|
private void initialize() throws IOException, InterruptedException {
|
||||||
|
try {
|
||||||
initializeZooKeeper();
|
initializeZooKeeper();
|
||||||
initializeThreads();
|
initializeThreads();
|
||||||
int nbBlocks = conf.getInt("hbase.regionserver.nbreservationblocks", 4);
|
int nbBlocks = conf.getInt("hbase.regionserver.nbreservationblocks", 4);
|
||||||
for (int i = 0; i < nbBlocks; i++) {
|
for (int i = 0; i < nbBlocks; i++) {
|
||||||
reservedSpace.add(new byte[HConstants.DEFAULT_SIZE_RESERVATION_BLOCK]);
|
reservedSpace.add(new byte[HConstants.DEFAULT_SIZE_RESERVATION_BLOCK]);
|
||||||
}
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
// Call stop if error or process will stick around for ever since server
|
||||||
|
// puts up non-daemon threads.
|
||||||
|
this.server.stop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -104,7 +104,30 @@ public class ZooKeeperWatcher implements Watcher {
|
||||||
try {
|
try {
|
||||||
// Create all the necessary "directories" of znodes
|
// Create all the necessary "directories" of znodes
|
||||||
// TODO: Move this to an init method somewhere so not everyone calls it?
|
// TODO: Move this to an init method somewhere so not everyone calls it?
|
||||||
|
|
||||||
|
// The first call against zk can fail with connection loss. Seems common.
|
||||||
|
// Apparently this is recoverable. Retry a while.
|
||||||
|
// See http://wiki.apache.org/hadoop/ZooKeeper/ErrorHandling
|
||||||
|
// TODO: Generalize out in ZKUtil.
|
||||||
|
long wait = conf.getLong("hbase.zookeeper.recoverable.waittime", 10000);
|
||||||
|
long finished = System.currentTimeMillis() + wait;
|
||||||
|
KeeperException ke = null;
|
||||||
|
do {
|
||||||
|
try {
|
||||||
ZKUtil.createAndFailSilent(this, baseZNode);
|
ZKUtil.createAndFailSilent(this, baseZNode);
|
||||||
|
ke = null;
|
||||||
|
break;
|
||||||
|
} catch (KeeperException.ConnectionLossException e) {
|
||||||
|
if (LOG.isDebugEnabled() && (isFinishedRetryingRecoverable(finished))) {
|
||||||
|
LOG.debug("Retrying zk create for another " +
|
||||||
|
(finished - System.currentTimeMillis()) +
|
||||||
|
"ms; set 'hbase.zookeeper.recoverable.waittime' to change " +
|
||||||
|
"wait time); " + e.getMessage());
|
||||||
|
}
|
||||||
|
ke = e;
|
||||||
|
}
|
||||||
|
} while (isFinishedRetryingRecoverable(finished));
|
||||||
|
if (ke != null) throw ke;
|
||||||
ZKUtil.createAndFailSilent(this, assignmentZNode);
|
ZKUtil.createAndFailSilent(this, assignmentZNode);
|
||||||
ZKUtil.createAndFailSilent(this, rsZNode);
|
ZKUtil.createAndFailSilent(this, rsZNode);
|
||||||
ZKUtil.createAndFailSilent(this, tableZNode);
|
ZKUtil.createAndFailSilent(this, tableZNode);
|
||||||
|
@ -114,6 +137,10 @@ public class ZooKeeperWatcher implements Watcher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isFinishedRetryingRecoverable(final long finished) {
|
||||||
|
return System.currentTimeMillis() < finished;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return this.identifier;
|
return this.identifier;
|
||||||
|
|
Loading…
Reference in New Issue