From f5a08063b6731505c45102545ab937b130ab4f47 Mon Sep 17 00:00:00 2001 From: Rushabh Shah Date: Sat, 7 May 2022 10:38:17 -0700 Subject: [PATCH] HBASE-26963 ReplicationSource#removePeer hangs if we try to remove bad peer. (#4413) Signed-off-by: Andrew Purtell --- .../hbase/regionserver/HRegionServer.java | 4 +- .../regionserver/ReplicationSource.java | 15 ++- .../replication/TestBadReplicationPeer.java | 93 +++++++++++++++++++ 3 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/replication/TestBadReplicationPeer.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 29a0b508096..6071d17be0d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1257,8 +1257,8 @@ public class HRegionServer extends Thread } /** - * This method is called when HMaster and HRegionServer are started. - * Please see to HBASE-26977 for details. + * This method is called when HMaster and HRegionServer are started. Please see to HBASE-26977 for + * details. */ private void installShutdownHook() { ShutdownHook.install(conf, dataFs, this, Thread.currentThread()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index 3d1746edd8d..64bba81d9ec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -552,8 +552,14 @@ public class ReplicationSource implements ReplicationSourceInterface { } if (!this.isSourceActive()) { - retryStartup.set(!this.abortOnError); setSourceStartupStatus(false); + if (Thread.currentThread().isInterrupted()) { + // If source is not running and thread is interrupted this means someone has tried to + // remove this peer. + return; + } + + retryStartup.set(!this.abortOnError); throw new IllegalStateException("Source should be active."); } @@ -576,8 +582,13 @@ public class ReplicationSource implements ReplicationSourceInterface { } if (!this.isSourceActive()) { - retryStartup.set(!this.abortOnError); setSourceStartupStatus(false); + if (Thread.currentThread().isInterrupted()) { + // If source is not running and thread is interrupted this means someone has tried to + // remove this peer. + return; + } + retryStartup.set(!this.abortOnError); throw new IllegalStateException("Source should be active."); } LOG.info("{} queueId={} (queues={}) is replicating from cluster={} to cluster={}", logPeerId(), diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/replication/TestBadReplicationPeer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/replication/TestBadReplicationPeer.java new file mode 100644 index 00000000000..f3e0af5432a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/replication/TestBadReplicationPeer.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client.replication; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; +import org.apache.hadoop.hbase.replication.ReplicationPeerConfigBuilder; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ MediumTests.class, ClientTests.class }) +public class TestBadReplicationPeer { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestBadReplicationPeer.class); + private static final Logger LOG = LoggerFactory.getLogger(TestBadReplicationPeer.class); + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static Configuration conf; + + @Rule + public TestName name = new TestName(); + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); + TEST_UTIL.getConfiguration().setBoolean("replication.source.regionserver.abort", false); + TEST_UTIL.startMiniCluster(); + conf = TEST_UTIL.getConfiguration(); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + /* + * Add dummy peer and make sure that we are able to remove that peer. + */ + @Test + public void testRemovePeerSucceeds() throws IOException { + String peerId = "dummypeer_1"; + try (Connection connection = ConnectionFactory.createConnection(conf); + Admin admin = connection.getAdmin()) { + ReplicationPeerConfigBuilder rpcBuilder = ReplicationPeerConfig.newBuilder(); + String quorum = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper().getQuorum(); + rpcBuilder.setClusterKey(quorum + ":/1"); + ReplicationPeerConfig rpc = rpcBuilder.build(); + admin.addReplicationPeer(peerId, rpc); + LOG.info("Added replication peer with peer id: {}", peerId); + } finally { + LOG.info("Removing replication peer with peer id: {}", peerId); + cleanPeer(peerId); + } + } + + private static void cleanPeer(String peerId) throws IOException { + try (Connection connection = ConnectionFactory.createConnection(conf); + Admin admin = connection.getAdmin()) { + admin.removeReplicationPeer(peerId); + } + } +}