HBASE-20855 PeerConfigTracker only supporting one listener will cause problem when there is a recovered replication queue
Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
parent
61288f843c
commit
15ed2e86e1
|
@ -81,4 +81,10 @@ public interface ReplicationPeer {
|
|||
* @param listener Listener for config changes, usually a replication endpoint
|
||||
*/
|
||||
void trackPeerConfigChanges(ReplicationPeerConfigListener listener);
|
||||
|
||||
/**
|
||||
* Remove a listener when it is closed or terminated
|
||||
* @param listener Listener for config changes, usually a replication endpoint
|
||||
*/
|
||||
void removeListenerOfPeerConfig(ReplicationPeerConfigListener listener);
|
||||
}
|
||||
|
|
|
@ -21,8 +21,10 @@ package org.apache.hadoop.hbase.replication;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -172,10 +174,21 @@ public class ReplicationPeerZKImpl extends ReplicationStateZKBase implements Rep
|
|||
@Override
|
||||
public void trackPeerConfigChanges(ReplicationPeerConfigListener listener) {
|
||||
if (this.peerConfigTracker != null){
|
||||
this.peerConfigTracker.setListener(listener);
|
||||
this.peerConfigTracker.addListener(listener);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeListenerOfPeerConfig(ReplicationPeerConfigListener listener) {
|
||||
if (this.peerConfigTracker != null){
|
||||
this.peerConfigTracker.removeListener(listener);
|
||||
}
|
||||
}
|
||||
|
||||
PeerConfigTracker getPeerConfigTracker() {
|
||||
return this.peerConfigTracker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void abort(String why, Throwable e) {
|
||||
LOG.fatal("The ReplicationPeer corresponding to peer " + peerConfig
|
||||
|
@ -275,15 +288,24 @@ public class ReplicationPeerZKImpl extends ReplicationStateZKBase implements Rep
|
|||
*/
|
||||
public class PeerConfigTracker extends ZooKeeperNodeTracker {
|
||||
|
||||
private ReplicationPeerConfigListener listener;
|
||||
private Set<ReplicationPeerConfigListener> listeners;
|
||||
|
||||
public PeerConfigTracker(String peerConfigNode, ZooKeeperWatcher watcher,
|
||||
Abortable abortable) {
|
||||
super(watcher, peerConfigNode, abortable);
|
||||
listeners = new HashSet<>();
|
||||
}
|
||||
|
||||
public synchronized void setListener(ReplicationPeerConfigListener listener){
|
||||
this.listener = listener;
|
||||
public synchronized void addListener(ReplicationPeerConfigListener listener){
|
||||
listeners.add(listener);
|
||||
}
|
||||
|
||||
Set<ReplicationPeerConfigListener> getListeners(){
|
||||
return this.listeners;
|
||||
}
|
||||
|
||||
public synchronized void removeListener(ReplicationPeerConfigListener listenerToRemove) {
|
||||
listeners.remove(listenerToRemove);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -291,7 +313,7 @@ public class ReplicationPeerZKImpl extends ReplicationStateZKBase implements Rep
|
|||
if (path.equals(node)) {
|
||||
super.nodeCreated(path);
|
||||
ReplicationPeerConfig config = readPeerConfig();
|
||||
if (listener != null){
|
||||
for (ReplicationPeerConfigListener listener : listeners) {
|
||||
listener.peerConfigUpdated(config);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -111,4 +111,10 @@ public abstract class BaseReplicationEndpoint extends AbstractService
|
|||
return false;
|
||||
}
|
||||
|
||||
public void close(){
|
||||
if(this.ctx != null) {
|
||||
ReplicationPeer peer = this.ctx.getReplicationPeer();
|
||||
peer.removeListenerOfPeerConfig(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,6 +89,7 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
|
|||
@Override
|
||||
protected void doStop() {
|
||||
disconnect();
|
||||
close();
|
||||
notifyStopped();
|
||||
}
|
||||
|
||||
|
|
|
@ -446,6 +446,7 @@ public class HBaseInterClusterReplicationEndpoint extends HBaseReplicationEndpoi
|
|||
"Aborting to prevent Replication from deadlocking. See HBASE-16081.";
|
||||
abortable.abort(errMsg, new IOException(errMsg));
|
||||
}
|
||||
close();
|
||||
notifyStopped();
|
||||
}
|
||||
|
||||
|
|
|
@ -579,6 +579,10 @@ public class ReplicationSource extends Thread implements ReplicationSourceInterf
|
|||
}
|
||||
if (allOtherTaskDone) {
|
||||
manager.closeRecoveredQueue(this.source);
|
||||
// stop replication endpoint
|
||||
if (source instanceof ReplicationSource) {
|
||||
((ReplicationSource) source).replicationEndpoint.stop();
|
||||
}
|
||||
LOG.info("Finished recovering queue " + peerClusterZnode
|
||||
+ " with the following stats: " + getStats());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.replication;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.Waiter;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.replication.regionserver.Replication;
|
||||
import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category(MediumTests.class)
|
||||
public class TestReplicationConfigTracker extends TestReplicationBase {
|
||||
private static final Log LOG = LogFactory.getLog(TestReplicationKillRS.class);
|
||||
|
||||
@Test
|
||||
public void testReplicationConfigTracker() throws Exception {
|
||||
// killing the RS with hbase:meta can result into failed puts until we solve
|
||||
// IO fencing
|
||||
int rsToKill1 = utility1.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
|
||||
int otherRs = rsToKill1 == 0 ? 1 : 0;
|
||||
final HRegionServer regionServer = utility1.getHBaseCluster().getRegionServer(otherRs);
|
||||
final Thread listenerTracker = trackListener(utility1, otherRs);
|
||||
LOG.info("Start loading table");
|
||||
utility1.loadTable(htable1, famName, true);
|
||||
LOG.info("Done loading table");
|
||||
utility1.getHBaseCluster().getRegionServer(rsToKill1).abort("Stopping as part of the test");
|
||||
utility1.getHBaseCluster().waitOnRegionServer(rsToKill1);
|
||||
while (utility1.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress()) {
|
||||
LOG.info("Waiting on processing of crashed server before proceeding...");
|
||||
Threads.sleep(1000);
|
||||
}
|
||||
Waiter.waitFor(utility1.getConfiguration(), 20000, new Waiter.Predicate<Exception>() {
|
||||
@Override public boolean evaluate() throws Exception {
|
||||
return !listenerTracker.isAlive();
|
||||
}
|
||||
});
|
||||
final ReplicationPeerZKImpl.PeerConfigTracker tracker = getPeerConfigTracker(regionServer);
|
||||
Waiter.waitFor(utility1.getConfiguration(), 20000, new Waiter.Predicate<Exception>() {
|
||||
@Override public boolean evaluate() throws Exception {
|
||||
return tracker.getListeners().size() == 1;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private static Thread trackListener(final HBaseTestingUtility utility, final int rs) {
|
||||
Thread trackListener = new Thread() {
|
||||
public void run() {
|
||||
Replication replication = (Replication) utility.getHBaseCluster().getRegionServer(rs)
|
||||
.getReplicationSourceService();
|
||||
ReplicationSourceManager manager = replication.getReplicationManager();
|
||||
ReplicationPeerZKImpl replicationPeerZK =
|
||||
(ReplicationPeerZKImpl) manager.getReplicationPeers().getPeer(PEER_ID);
|
||||
ReplicationPeerZKImpl.PeerConfigTracker peerConfigTracker =
|
||||
replicationPeerZK.getPeerConfigTracker();
|
||||
while (peerConfigTracker.getListeners().size() != 2) {
|
||||
try {
|
||||
Thread.sleep(50);
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("track config failed", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
trackListener.setDaemon(true);
|
||||
trackListener.start();
|
||||
return trackListener;
|
||||
}
|
||||
|
||||
private ReplicationPeerZKImpl.PeerConfigTracker getPeerConfigTracker(HRegionServer rs) {
|
||||
Replication replication = (Replication) rs.getReplicationSourceService();
|
||||
ReplicationSourceManager manager = replication.getReplicationManager();
|
||||
ReplicationPeerZKImpl replicationPeerZK =
|
||||
(ReplicationPeerZKImpl) manager.getReplicationPeers().getPeer(PEER_ID);
|
||||
ReplicationPeerZKImpl.PeerConfigTracker peerConfigTracker =
|
||||
replicationPeerZK.getPeerConfigTracker();
|
||||
return peerConfigTracker;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue