HBASE-20597 Use a lock to serialize access to a shared reference to ZooKeeperWatcher in HBaseReplicationEndpoint

This commit is contained in:
Andrew Purtell 2018-05-17 10:30:28 -07:00
parent 498f3bf953
commit 1b70763b9e

View File

@ -43,21 +43,22 @@ import org.apache.zookeeper.KeeperException.SessionExpiredException;
* target cluster is an HBase cluster. * target cluster is an HBase cluster.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MT_CORRECTNESS",
justification="Thinks zkw needs to be synchronized access but should be fine as is.")
public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
implements Abortable { implements Abortable {
private static final Log LOG = LogFactory.getLog(HBaseReplicationEndpoint.class); private static final Log LOG = LogFactory.getLog(HBaseReplicationEndpoint.class);
private ZooKeeperWatcher zkw = null; // FindBugs: MT_CORRECTNESS private Object zkwLock = new Object();
private ZooKeeperWatcher zkw = null;
private List<ServerName> regionServers = new ArrayList<ServerName>(0); private List<ServerName> regionServers = new ArrayList<ServerName>(0);
private long lastRegionServerUpdate; private long lastRegionServerUpdate;
protected void disconnect() { protected void disconnect() {
if (zkw != null) { synchronized (zkwLock) {
zkw.close(); if (zkw != null) {
zkw.close();
}
} }
} }
@ -102,7 +103,9 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
public synchronized UUID getPeerUUID() { public synchronized UUID getPeerUUID() {
UUID peerUUID = null; UUID peerUUID = null;
try { try {
peerUUID = ZKClusterId.getUUIDForCluster(zkw); synchronized (zkwLock) {
peerUUID = ZKClusterId.getUUIDForCluster(zkw);
}
} catch (KeeperException ke) { } catch (KeeperException ke) {
reconnect(ke); reconnect(ke);
} }
@ -114,7 +117,9 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
* @return zk connection * @return zk connection
*/ */
protected ZooKeeperWatcher getZkw() { protected ZooKeeperWatcher getZkw() {
return zkw; synchronized (zkwLock) {
return zkw;
}
} }
/** /**
@ -122,10 +127,14 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
* @throws IOException If anything goes wrong connecting * @throws IOException If anything goes wrong connecting
*/ */
void reloadZkWatcher() throws IOException { void reloadZkWatcher() throws IOException {
if (zkw != null) zkw.close(); synchronized (zkwLock) {
zkw = new ZooKeeperWatcher(ctx.getConfiguration(), if (zkw != null) {
zkw.close();
}
zkw = new ZooKeeperWatcher(ctx.getConfiguration(),
"connection to cluster: " + ctx.getPeerId(), this); "connection to cluster: " + ctx.getPeerId(), this);
getZkw().registerListener(new PeerRegionServerListener(this)); zkw.registerListener(new PeerRegionServerListener(this));
}
} }
@Override @Override
@ -163,13 +172,15 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
* for this peer cluster * for this peer cluster
* @return list of addresses * @return list of addresses
*/ */
// Synchronize peer cluster connection attempts to avoid races and rate public List<ServerName> getRegionServers() {
// limit connections when multiple replication sources try to connect to
// the peer cluster. If the peer cluster is down we can get out of control
// over time.
public synchronized List<ServerName> getRegionServers() {
try { try {
setRegionServers(fetchSlavesAddresses(this.getZkw())); // Synchronize peer cluster connection attempts to avoid races and rate
// limit connections when multiple replication sources try to connect to
// the peer cluster. If the peer cluster is down we can get out of control
// over time.
synchronized (zkwLock) {
setRegionServers(fetchSlavesAddresses(zkw));
}
} catch (KeeperException ke) { } catch (KeeperException ke) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Fetch slaves addresses failed", ke); LOG.debug("Fetch slaves addresses failed", ke);