HBASE-5153 Add retry logic in HConnectionImplementation#resetZooKeeperTrackers (Jieshan)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1232292 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e20aa2b3a6
commit
5ce4d352de
@ -862,6 +862,10 @@ Release 0.92.0 - Unreleased
|
|||||||
HBASE-2742 Provide strong authentication with a secure RPC engine
|
HBASE-2742 Provide strong authentication with a secure RPC engine
|
||||||
HBASE-3025 Coprocessor based access control
|
HBASE-3025 Coprocessor based access control
|
||||||
|
|
||||||
|
Release 0.90.7 - Unreleased
|
||||||
|
|
||||||
|
BUG FIXES
|
||||||
|
HBASE-5153 Add retry logic in HConnectionImplementation#resetZooKeeperTrackers (Jieshan)
|
||||||
Release 0.90.6 - Unreleased
|
Release 0.90.6 - Unreleased
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
@ -235,8 +235,8 @@ public class CatalogTracker {
|
|||||||
public void start() throws IOException, InterruptedException {
|
public void start() throws IOException, InterruptedException {
|
||||||
LOG.debug("Starting catalog tracker " + this);
|
LOG.debug("Starting catalog tracker " + this);
|
||||||
try {
|
try {
|
||||||
this.rootRegionTracker.start();
|
this.rootRegionTracker.start(true);
|
||||||
this.metaNodeTracker.start();
|
this.metaNodeTracker.start(true);
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
Throwable t = e.getCause();
|
Throwable t = e.getCause();
|
||||||
this.abortable.abort(e.getMessage(), t);
|
this.abortable.abort(e.getMessage(), t);
|
||||||
|
@ -0,0 +1,36 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown when HConnection has been closed.
|
||||||
|
*/
|
||||||
|
public class ClosedConnectionException extends IOException {
|
||||||
|
private static final long serialVersionUID = 8792360655678089586L;
|
||||||
|
|
||||||
|
public ClosedConnectionException() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public ClosedConnectionException(String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
}
|
@ -121,7 +121,7 @@ public class HBaseAdmin implements Abortable, Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Thread.sleep(getPauseTime(tries));
|
Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
// we should delete connection between client and zookeeper
|
// we should delete connection between client and zookeeper
|
||||||
@ -301,14 +301,6 @@ public class HBaseAdmin implements Abortable, Closeable {
|
|||||||
return this.connection.getHTableDescriptor(tableName);
|
return this.connection.getHTableDescriptor(tableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
private long getPauseTime(int tries) {
|
|
||||||
int triesCount = tries;
|
|
||||||
if (triesCount >= HConstants.RETRY_BACKOFF.length) {
|
|
||||||
triesCount = HConstants.RETRY_BACKOFF.length - 1;
|
|
||||||
}
|
|
||||||
return this.pause * HConstants.RETRY_BACKOFF[triesCount];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new table.
|
* Creates a new table.
|
||||||
* Synchronous operation.
|
* Synchronous operation.
|
||||||
@ -429,7 +421,7 @@ public class HBaseAdmin implements Abortable, Closeable {
|
|||||||
" of " + numRegs + " regions are online; retries exhausted.");
|
" of " + numRegs + " regions are online; retries exhausted.");
|
||||||
}
|
}
|
||||||
try { // Sleep
|
try { // Sleep
|
||||||
Thread.sleep(getPauseTime(tries));
|
Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new InterruptedIOException("Interrupted when opening" +
|
throw new InterruptedIOException("Interrupted when opening" +
|
||||||
" regions; " + actualRegCount.get() + " of " + numRegs +
|
" regions; " + actualRegCount.get() + " of " + numRegs +
|
||||||
@ -557,7 +549,7 @@ public class HBaseAdmin implements Abortable, Closeable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
Thread.sleep(getPauseTime(tries));
|
Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
// continue
|
// continue
|
||||||
}
|
}
|
||||||
@ -638,7 +630,7 @@ public class HBaseAdmin implements Abortable, Closeable {
|
|||||||
if (enabled) {
|
if (enabled) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
long sleep = getPauseTime(tries);
|
long sleep = ConnectionUtils.getPauseTime(this.pause, tries);
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Sleeping= " + sleep + "ms, waiting for all regions to be " +
|
LOG.debug("Sleeping= " + sleep + "ms, waiting for all regions to be " +
|
||||||
"enabled in " + Bytes.toString(tableName));
|
"enabled in " + Bytes.toString(tableName));
|
||||||
@ -779,7 +771,7 @@ public class HBaseAdmin implements Abortable, Closeable {
|
|||||||
if (disabled) {
|
if (disabled) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
long sleep = getPauseTime(tries);
|
long sleep = ConnectionUtils.getPauseTime(this.pause, tries);
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Sleeping= " + sleep + "ms, waiting for all regions to be " +
|
LOG.debug("Sleeping= " + sleep + "ms, waiting for all regions to be " +
|
||||||
"disabled in " + Bytes.toString(tableName));
|
"disabled in " + Bytes.toString(tableName));
|
||||||
|
@ -146,6 +146,12 @@ public interface HConnection extends Abortable, Closeable {
|
|||||||
*/
|
*/
|
||||||
public void clearRegionCache();
|
public void clearRegionCache();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the original connection and creates a new one.
|
||||||
|
* @throws ZooKeeperConnectionException if unable to connect to zookeeper
|
||||||
|
*/
|
||||||
|
public void resetZooKeeperTrackersWithRetries() throws ZooKeeperConnectionException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows flushing the region cache of all locations that pertain to
|
* Allows flushing the region cache of all locations that pertain to
|
||||||
* <code>tableName</code>
|
* <code>tableName</code>
|
||||||
|
@ -574,35 +574,70 @@ public class HConnectionManager {
|
|||||||
HConstants.HBASE_CLIENT_PREFETCH_LIMIT,
|
HConstants.HBASE_CLIENT_PREFETCH_LIMIT,
|
||||||
HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT);
|
HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT);
|
||||||
|
|
||||||
setupZookeeperTrackers();
|
setupZookeeperTrackers(true);
|
||||||
|
|
||||||
this.master = null;
|
this.master = null;
|
||||||
this.masterChecked = false;
|
this.masterChecked = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void setupZookeeperTrackers()
|
private synchronized boolean setupZookeeperTrackers(boolean allowAbort)
|
||||||
throws ZooKeeperConnectionException{
|
throws ZooKeeperConnectionException{
|
||||||
// initialize zookeeper and master address manager
|
// initialize zookeeper and master address manager
|
||||||
this.zooKeeper = getZooKeeperWatcher();
|
this.zooKeeper = getZooKeeperWatcher();
|
||||||
masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
|
this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
|
||||||
masterAddressTracker.start();
|
|
||||||
|
|
||||||
this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this);
|
this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this);
|
||||||
this.rootRegionTracker.start();
|
if (!this.masterAddressTracker.start(allowAbort)) {
|
||||||
|
this.masterAddressTracker.stop();
|
||||||
|
this.masterAddressTracker = null;
|
||||||
|
this.zooKeeper = null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!this.rootRegionTracker.start(allowAbort)) {
|
||||||
|
this.masterAddressTracker.stop();
|
||||||
|
this.rootRegionTracker.stop();
|
||||||
|
this.masterAddressTracker = null;
|
||||||
|
this.rootRegionTracker = null;
|
||||||
|
this.zooKeeper = null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
this.clusterId = new ClusterId(this.zooKeeper, this);
|
this.clusterId = new ClusterId(this.zooKeeper, this);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void resetZooKeeperTrackers()
|
@Override
|
||||||
|
public synchronized void resetZooKeeperTrackersWithRetries()
|
||||||
throws ZooKeeperConnectionException {
|
throws ZooKeeperConnectionException {
|
||||||
LOG.info("Trying to reconnect to zookeeper");
|
LOG.info("Trying to reconnect to zookeeper");
|
||||||
masterAddressTracker.stop();
|
if (this.masterAddressTracker != null) {
|
||||||
masterAddressTracker = null;
|
this.masterAddressTracker.stop();
|
||||||
rootRegionTracker.stop();
|
this.masterAddressTracker = null;
|
||||||
rootRegionTracker = null;
|
}
|
||||||
clusterId = null;
|
if (this.rootRegionTracker != null) {
|
||||||
|
this.rootRegionTracker.stop();
|
||||||
|
this.rootRegionTracker = null;
|
||||||
|
}
|
||||||
this.zooKeeper = null;
|
this.zooKeeper = null;
|
||||||
setupZookeeperTrackers();
|
this.clusterId = null;
|
||||||
|
for (int tries = 0; tries < this.numRetries; tries++) {
|
||||||
|
boolean isLastTime = (tries == (this.numRetries - 1));
|
||||||
|
try {
|
||||||
|
if (setupZookeeperTrackers(isLastTime)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (ZooKeeperConnectionException zkce) {
|
||||||
|
if (isLastTime) {
|
||||||
|
throw zkce;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG.info("Tried to reconnect to zookeeper but failed, already tried "
|
||||||
|
+ tries + " times.");
|
||||||
|
try {
|
||||||
|
Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
|
||||||
|
} catch (InterruptedException e1) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Configuration getConfiguration() {
|
public Configuration getConfiguration() {
|
||||||
@ -802,7 +837,9 @@ public class HConnectionManager {
|
|||||||
private HRegionLocation locateRegion(final byte [] tableName,
|
private HRegionLocation locateRegion(final byte [] tableName,
|
||||||
final byte [] row, boolean useCache)
|
final byte [] row, boolean useCache)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (this.closed) throw new IOException(toString() + " closed");
|
if (this.closed) {
|
||||||
|
throw new ClosedConnectionException(toString() + " closed");
|
||||||
|
}
|
||||||
if (tableName == null || tableName.length == 0) {
|
if (tableName == null || tableName.length == 0) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"table name cannot be null or zero length");
|
"table name cannot be null or zero length");
|
||||||
@ -1024,7 +1061,8 @@ public class HConnectionManager {
|
|||||||
((metaLocation == null)? "null": "{" + metaLocation + "}") +
|
((metaLocation == null)? "null": "{" + metaLocation + "}") +
|
||||||
", attempt=" + tries + " of " +
|
", attempt=" + tries + " of " +
|
||||||
this.numRetries + " failed; retrying after sleep of " +
|
this.numRetries + " failed; retrying after sleep of " +
|
||||||
ConnectionUtils.getPauseTime(this.pause, tries) + " because: " + e.getMessage());
|
ConnectionUtils.getPauseTime(this.pause, tries) + " because: "
|
||||||
|
+ e.getMessage());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
throw e;
|
throw e;
|
||||||
@ -1331,11 +1369,17 @@ public class HConnectionManager {
|
|||||||
|
|
||||||
public <T> T getRegionServerWithRetries(ServerCallable<T> callable)
|
public <T> T getRegionServerWithRetries(ServerCallable<T> callable)
|
||||||
throws IOException, RuntimeException {
|
throws IOException, RuntimeException {
|
||||||
|
if (this.closed) {
|
||||||
|
throw new ClosedConnectionException(toString() + " closed");
|
||||||
|
}
|
||||||
return callable.withRetries();
|
return callable.withRetries();
|
||||||
}
|
}
|
||||||
|
|
||||||
public <T> T getRegionServerWithoutRetries(ServerCallable<T> callable)
|
public <T> T getRegionServerWithoutRetries(ServerCallable<T> callable)
|
||||||
throws IOException, RuntimeException {
|
throws IOException, RuntimeException {
|
||||||
|
if (this.closed) {
|
||||||
|
throw new ClosedConnectionException(toString() + " closed");
|
||||||
|
}
|
||||||
return callable.withoutRetries();
|
return callable.withoutRetries();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1659,11 +1703,12 @@ public class HConnectionManager {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void abort(final String msg, Throwable t) {
|
public void abort(final String msg, Throwable t) {
|
||||||
if (t instanceof KeeperException.SessionExpiredException) {
|
if (t instanceof KeeperException.SessionExpiredException
|
||||||
|
|| t instanceof KeeperException.ConnectionLossException) {
|
||||||
try {
|
try {
|
||||||
LOG.info("This client just lost it's session with ZooKeeper, trying" +
|
LOG.info("This client just lost it's session with ZooKeeper, trying" +
|
||||||
" to reconnect.");
|
" to reconnect.");
|
||||||
resetZooKeeperTrackers();
|
resetZooKeeperTrackersWithRetries();
|
||||||
LOG.info("Reconnected successfully. This disconnect could have been" +
|
LOG.info("Reconnected successfully. This disconnect could have been" +
|
||||||
" caused by a network partition or a long-running GC pause," +
|
" caused by a network partition or a long-running GC pause," +
|
||||||
" either way it's recommended that you verify your environment.");
|
" either way it's recommended that you verify your environment.");
|
||||||
|
@ -416,7 +416,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||||||
// Set the cluster as up. If new RSs, they'll be waiting on this before
|
// Set the cluster as up. If new RSs, they'll be waiting on this before
|
||||||
// going ahead with their startup.
|
// going ahead with their startup.
|
||||||
this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
|
this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
|
||||||
this.clusterStatusTracker.start();
|
this.clusterStatusTracker.start(true);
|
||||||
boolean wasUp = this.clusterStatusTracker.isClusterUp();
|
boolean wasUp = this.clusterStatusTracker.isClusterUp();
|
||||||
if (!wasUp) this.clusterStatusTracker.setClusterUp();
|
if (!wasUp) this.clusterStatusTracker.setClusterUp();
|
||||||
|
|
||||||
@ -424,7 +424,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||||||
this.schemaChangeTracker = new MasterSchemaChangeTracker(getZooKeeper(),
|
this.schemaChangeTracker = new MasterSchemaChangeTracker(getZooKeeper(),
|
||||||
this, this,
|
this, this,
|
||||||
conf.getInt("hbase.instant.schema.alter.timeout", 60000));
|
conf.getInt("hbase.instant.schema.alter.timeout", 60000));
|
||||||
this.schemaChangeTracker.start();
|
this.schemaChangeTracker.start(true);
|
||||||
|
|
||||||
LOG.info("Server active/primary master; " + this.serverName +
|
LOG.info("Server active/primary master; " + this.serverName +
|
||||||
", sessionid=0x" +
|
", sessionid=0x" +
|
||||||
|
@ -568,13 +568,13 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
|||||||
// block until a master is available. No point in starting up if no master
|
// block until a master is available. No point in starting up if no master
|
||||||
// running.
|
// running.
|
||||||
this.masterAddressManager = new MasterAddressTracker(this.zooKeeper, this);
|
this.masterAddressManager = new MasterAddressTracker(this.zooKeeper, this);
|
||||||
this.masterAddressManager.start();
|
this.masterAddressManager.start(true);
|
||||||
blockAndCheckIfStopped(this.masterAddressManager);
|
blockAndCheckIfStopped(this.masterAddressManager);
|
||||||
|
|
||||||
// Wait on cluster being up. Master will set this flag up in zookeeper
|
// Wait on cluster being up. Master will set this flag up in zookeeper
|
||||||
// when ready.
|
// when ready.
|
||||||
this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this);
|
this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this);
|
||||||
this.clusterStatusTracker.start();
|
this.clusterStatusTracker.start(true);
|
||||||
blockAndCheckIfStopped(this.clusterStatusTracker);
|
blockAndCheckIfStopped(this.clusterStatusTracker);
|
||||||
|
|
||||||
// Create the catalog tracker and start it;
|
// Create the catalog tracker and start it;
|
||||||
@ -585,7 +585,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
|||||||
// Schema change tracker
|
// Schema change tracker
|
||||||
this.schemaChangeTracker = new SchemaChangeTracker(this.zooKeeper,
|
this.schemaChangeTracker = new SchemaChangeTracker(this.zooKeeper,
|
||||||
this, this);
|
this, this);
|
||||||
this.schemaChangeTracker.start();
|
this.schemaChangeTracker.start(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -162,7 +162,7 @@ public class ReplicationZookeeper {
|
|||||||
// Set a tracker on replicationStateNodeNode
|
// Set a tracker on replicationStateNodeNode
|
||||||
this.statusTracker =
|
this.statusTracker =
|
||||||
new ReplicationStatusTracker(this.zookeeper, abortable);
|
new ReplicationStatusTracker(this.zookeeper, abortable);
|
||||||
statusTracker.start();
|
statusTracker.start(true);
|
||||||
readReplicationStateZnode();
|
readReplicationStateZnode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -626,7 +626,7 @@ public class HBaseFsck {
|
|||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
rootRegionTracker.start();
|
rootRegionTracker.start(true);
|
||||||
ServerName sn = null;
|
ServerName sn = null;
|
||||||
try {
|
try {
|
||||||
sn = rootRegionTracker.getRootRegionLocation();
|
sn = rootRegionTracker.getRootRegionLocation();
|
||||||
|
@ -60,15 +60,20 @@ public class MasterSchemaChangeTracker extends ZooKeeperNodeTracker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void start() {
|
public boolean start(boolean allowAbort) {
|
||||||
try {
|
try {
|
||||||
watcher.registerListener(this);
|
watcher.registerListener(this);
|
||||||
List<String> tables =
|
List<String> tables =
|
||||||
ZKUtil.listChildrenNoWatch(watcher, watcher.schemaZNode);
|
ZKUtil.listChildrenNoWatch(watcher, watcher.schemaZNode);
|
||||||
processCompletedSchemaChanges(tables);
|
processCompletedSchemaChanges(tables);
|
||||||
|
return true;
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.error("MasterSchemaChangeTracker startup failed.", e);
|
if (allowAbort && abortable != null) {
|
||||||
abortable.abort("MasterSchemaChangeTracker startup failed", e);
|
abortable.abort("MasterSchemaChangeTracker startup failed", e);
|
||||||
|
} else {
|
||||||
|
LOG.error("MasterSchemaChangeTracker startup failed.", e);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,14 +63,21 @@ public class SchemaChangeTracker extends ZooKeeperNodeTracker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void start() {
|
public boolean start(boolean allowAbort) {
|
||||||
try {
|
try {
|
||||||
watcher.registerListener(this);
|
watcher.registerListener(this);
|
||||||
ZKUtil.listChildrenAndWatchThem(watcher, node);
|
ZKUtil.listChildrenAndWatchThem(watcher, node);
|
||||||
// Clean-up old in-process schema changes for this RS now?
|
// Clean-up old in-process schema changes for this RS now?
|
||||||
|
return true;
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.error("RegionServer SchemaChangeTracker startup failed with " +
|
if (allowAbort && (abortable != null)) {
|
||||||
"KeeperException.", e);
|
abortable.abort("RegionServer SchemaChangeTracker startup failed",
|
||||||
|
e);
|
||||||
|
} else {
|
||||||
|
LOG.error("RegionServer SchemaChangeTracker startup failed with " +
|
||||||
|
"KeeperException.", e);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,8 +69,12 @@ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener {
|
|||||||
*
|
*
|
||||||
* <p>Use {@link #blockUntilAvailable()} to block until the node is available
|
* <p>Use {@link #blockUntilAvailable()} to block until the node is available
|
||||||
* or {@link #getData(boolean)} to get the data of the node if it is available.
|
* or {@link #getData(boolean)} to get the data of the node if it is available.
|
||||||
|
*
|
||||||
|
* @param allowAbort If allowAbort is false, the abortable should not abort when a
|
||||||
|
* KeeperException occur.
|
||||||
|
* @return start result. true if start successfully.
|
||||||
*/
|
*/
|
||||||
public synchronized void start() {
|
public synchronized boolean start(boolean allowAbort) {
|
||||||
this.watcher.registerListener(this);
|
this.watcher.registerListener(this);
|
||||||
try {
|
try {
|
||||||
if(ZKUtil.watchAndCheckExists(watcher, node)) {
|
if(ZKUtil.watchAndCheckExists(watcher, node)) {
|
||||||
@ -80,15 +84,21 @@ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener {
|
|||||||
} else {
|
} else {
|
||||||
// It existed but now does not, try again to ensure a watch is set
|
// It existed but now does not, try again to ensure a watch is set
|
||||||
LOG.debug("Try starting again because there is no data from " + node);
|
LOG.debug("Try starting again because there is no data from " + node);
|
||||||
start();
|
return start(allowAbort);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
abortable.abort("Unexpected exception during initialization, aborting", e);
|
if (allowAbort && (abortable != null)) {
|
||||||
|
abortable.abort("Unexpected exception during initialization, aborting",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void stop() {
|
public synchronized void stop() {
|
||||||
|
this.watcher.unregisterListener(this);
|
||||||
this.stopped = true;
|
this.stopped = true;
|
||||||
notifyAll();
|
notifyAll();
|
||||||
}
|
}
|
||||||
@ -171,7 +181,9 @@ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener {
|
|||||||
nodeDeleted(path);
|
nodeDeleted(path);
|
||||||
}
|
}
|
||||||
} catch(KeeperException e) {
|
} catch(KeeperException e) {
|
||||||
abortable.abort("Unexpected exception handling nodeCreated event", e);
|
if (abortable != null) {
|
||||||
|
abortable.abort("Unexpected exception handling nodeCreated event", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,7 +197,9 @@ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener {
|
|||||||
this.data = null;
|
this.data = null;
|
||||||
}
|
}
|
||||||
} catch(KeeperException e) {
|
} catch(KeeperException e) {
|
||||||
abortable.abort("Unexpected exception handling nodeDeleted event", e);
|
if (abortable != null) {
|
||||||
|
abortable.abort("Unexpected exception handling nodeDeleted event", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -226,6 +226,14 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
|
|||||||
listeners.add(listener);
|
listeners.add(listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unregister the specified listener.
|
||||||
|
* @param listener
|
||||||
|
*/
|
||||||
|
public void unregisterListener(ZooKeeperListener listener) {
|
||||||
|
listeners.remove(listener);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register the specified listener to receive ZooKeeper events and add it as
|
* Register the specified listener to receive ZooKeeper events and add it as
|
||||||
* the first in the list of current listeners.
|
* the first in the list of current listeners.
|
||||||
|
@ -35,7 +35,6 @@ import java.io.FileOutputStream;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -83,6 +82,7 @@ import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
|||||||
import org.apache.hadoop.hbase.regionserver.Store;
|
import org.apache.hadoop.hbase.regionserver.Store;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.io.DataInputBuffer;
|
import org.apache.hadoop.io.DataInputBuffer;
|
||||||
|
import org.apache.zookeeper.KeeperException;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
@ -4568,6 +4568,37 @@ public class TestFromClientSide {
|
|||||||
assertTrue(addrAfter.getPort() != addrCache.getPort());
|
assertTrue(addrAfter.getPort() != addrCache.getPort());
|
||||||
assertEquals(addrAfter.getPort(), addrNoCache.getPort());
|
assertEquals(addrAfter.getPort(), addrNoCache.getPort());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test HConnection can be recovered after this connection has been
|
||||||
|
* aborted.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConnectionResetAfterAbort() throws IOException {
|
||||||
|
final byte[] COLUMN_FAMILY = Bytes.toBytes("columnfam");
|
||||||
|
final byte[] COLUMN = Bytes.toBytes("col");
|
||||||
|
HTable table = TEST_UTIL.createTable(
|
||||||
|
Bytes.toBytes("testConnectionRecover"), new byte[][] { COLUMN_FAMILY });
|
||||||
|
Put put01 = new Put(Bytes.toBytes("testrow1"));
|
||||||
|
put01.add(COLUMN_FAMILY, COLUMN, Bytes.toBytes("testValue"));
|
||||||
|
table.put(put01);
|
||||||
|
|
||||||
|
// At this time, abort the connection.
|
||||||
|
HConnection conn = table.getConnection();
|
||||||
|
conn.abort("Test Connection Abort", new KeeperException.ConnectionLossException());
|
||||||
|
boolean putSuccess = true;
|
||||||
|
// This put will success, for the connection has been recovered.
|
||||||
|
try {
|
||||||
|
Put put02 = new Put(Bytes.toBytes("testrow1"));
|
||||||
|
put02.add(COLUMN_FAMILY, COLUMN, Bytes.toBytes("testValue"));
|
||||||
|
table.put(put02);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
putSuccess = false;
|
||||||
|
}
|
||||||
|
assertTrue(putSuccess);
|
||||||
|
}
|
||||||
|
|
||||||
@org.junit.Rule
|
@org.junit.Rule
|
||||||
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
||||||
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
|
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
|
||||||
|
@ -180,7 +180,7 @@ public class TestMasterCoprocessorExceptionWithAbort {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
masterTracker.start();
|
masterTracker.start(true);
|
||||||
zkw.registerListener(masterTracker);
|
zkw.registerListener(masterTracker);
|
||||||
|
|
||||||
// Test (part of the) output that should have be printed by master when it aborts:
|
// Test (part of the) output that should have be printed by master when it aborts:
|
||||||
|
@ -166,7 +166,7 @@ public class TestMasterCoprocessorExceptionWithRemove {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
masterTracker.start();
|
masterTracker.start(true);
|
||||||
zkw.registerListener(masterTracker);
|
zkw.registerListener(masterTracker);
|
||||||
|
|
||||||
// Test (part of the) output that should have be printed by master when it aborts:
|
// Test (part of the) output that should have be printed by master when it aborts:
|
||||||
|
@ -122,7 +122,7 @@ public class TestRegionServerCoprocessorExceptionWithAbort {
|
|||||||
|
|
||||||
RSTracker rsTracker = new RSTracker(zkw,
|
RSTracker rsTracker = new RSTracker(zkw,
|
||||||
"/hbase/rs/"+regionServer.getServerName(), Thread.currentThread());
|
"/hbase/rs/"+regionServer.getServerName(), Thread.currentThread());
|
||||||
rsTracker.start();
|
rsTracker.start(true);
|
||||||
zkw.registerListener(rsTracker);
|
zkw.registerListener(rsTracker);
|
||||||
|
|
||||||
boolean caughtInterruption = false;
|
boolean caughtInterruption = false;
|
||||||
|
@ -64,7 +64,7 @@ public class TestMasterAddressManager {
|
|||||||
|
|
||||||
// Should not have a master yet
|
// Should not have a master yet
|
||||||
MasterAddressTracker addressManager = new MasterAddressTracker(zk, null);
|
MasterAddressTracker addressManager = new MasterAddressTracker(zk, null);
|
||||||
addressManager.start();
|
addressManager.start(true);
|
||||||
assertFalse(addressManager.hasMaster());
|
assertFalse(addressManager.hasMaster());
|
||||||
zk.registerListener(addressManager);
|
zk.registerListener(addressManager);
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ public class TestZooKeeperNodeTracker {
|
|||||||
ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
|
ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
|
||||||
"testInterruptible", abortable);
|
"testInterruptible", abortable);
|
||||||
final TestTracker tracker = new TestTracker(zk, "/xyz", abortable);
|
final TestTracker tracker = new TestTracker(zk, "/xyz", abortable);
|
||||||
tracker.start();
|
tracker.start(true);
|
||||||
Thread t = new Thread() {
|
Thread t = new Thread() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
@ -105,7 +105,7 @@ public class TestZooKeeperNodeTracker {
|
|||||||
|
|
||||||
// Start a ZKNT with no node currently available
|
// Start a ZKNT with no node currently available
|
||||||
TestTracker localTracker = new TestTracker(zk, node, abortable);
|
TestTracker localTracker = new TestTracker(zk, node, abortable);
|
||||||
localTracker.start();
|
localTracker.start(true);
|
||||||
zk.registerListener(localTracker);
|
zk.registerListener(localTracker);
|
||||||
|
|
||||||
// Make sure we don't have a node
|
// Make sure we don't have a node
|
||||||
@ -120,7 +120,7 @@ public class TestZooKeeperNodeTracker {
|
|||||||
|
|
||||||
// Now, start a new ZKNT with the node already available
|
// Now, start a new ZKNT with the node already available
|
||||||
TestTracker secondTracker = new TestTracker(zk, node, null);
|
TestTracker secondTracker = new TestTracker(zk, node, null);
|
||||||
secondTracker.start();
|
secondTracker.start(true);
|
||||||
zk.registerListener(secondTracker);
|
zk.registerListener(secondTracker);
|
||||||
|
|
||||||
// Put up an additional zk listener so we know when zk event is done
|
// Put up an additional zk listener so we know when zk event is done
|
||||||
@ -213,7 +213,7 @@ public class TestZooKeeperNodeTracker {
|
|||||||
|
|
||||||
public WaitToGetDataThread(ZooKeeperWatcher zk, String node) {
|
public WaitToGetDataThread(ZooKeeperWatcher zk, String node) {
|
||||||
tracker = new TestTracker(zk, node, null);
|
tracker = new TestTracker(zk, node, null);
|
||||||
tracker.start();
|
tracker.start(true);
|
||||||
zk.registerListener(tracker);
|
zk.registerListener(tracker);
|
||||||
hasData = false;
|
hasData = false;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user