From 973b4ddcfa174f06470ebfe02977f41fb8a02a6e Mon Sep 17 00:00:00 2001 From: Sergey Soldatov Date: Wed, 25 Jul 2018 23:32:36 -0700 Subject: [PATCH] HBASE-20927 RSGroupAdminEndpoint doesn't handle clearing dead servers if they are not processed yet. Signed-off-by: tedyu --- .../hbase/rsgroup/RSGroupAdminEndpoint.java | 4 ++- .../hbase/rsgroup/TestRSGroupsBase.java | 28 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java index b67e335a334..3d1f7806280 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java @@ -539,7 +539,9 @@ public class RSGroupAdminEndpoint implements MasterCoprocessor, MasterObserver { filter(server -> !notClearedServers.contains(server)). map(ServerName::getAddress). collect(Collectors.toSet()); - groupAdminServer.removeServers(clearedServer); + if(!clearedServer.isEmpty()) { + groupAdminServer.removeServers(clearedServer); + } } public void checkPermission(String request) throws IOException { diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java index 199dd9864c5..43099db1377 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java @@ -1121,4 +1121,32 @@ public abstract class TestRSGroupsBase { } }); } + @Test + public void testClearNotProcessedDeadServer() throws Exception { + LOG.info("testClearNotProcessedDeadServer"); + NUM_DEAD_SERVERS = cluster.getClusterMetrics().getDeadServerNames().size(); + RSGroupInfo appInfo = addGroup("deadServerGroup", 1); + ServerName targetServer = + ServerName.parseServerName(appInfo.getServers().iterator().next().toString()); + AdminProtos.AdminService.BlockingInterface targetRS = + ((ClusterConnection) admin.getConnection()).getAdmin(targetServer); + try { + targetServer = ProtobufUtil.toServerName(targetRS.getServerInfo(null, + AdminProtos.GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName()); + //stopping may cause an exception + //due to the connection loss + targetRS.stopServer(null, + AdminProtos.StopServerRequest.newBuilder().setReason("Die").build()); + NUM_DEAD_SERVERS ++; + } catch(Exception e) { + } + TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + return cluster.getClusterMetrics().getDeadServerNames().size() == NUM_DEAD_SERVERS; + } + }); + List notClearedServers = admin.clearDeadServers(Lists.newArrayList(targetServer)); + assertEquals(1, notClearedServers.size()); + } }