From 2a95fffb3b42b9cf0f6c3cabd934a47c7d675556 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Fri, 3 Apr 2020 18:28:49 +0530 Subject: [PATCH] =?UTF-8?q?HBASE-24102=20:=20Remove=20decommissioned=20RS?= =?UTF-8?q?=20from=20target=20servers=20while=20unlo=E2=80=A6=20(#1417)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: binlijin Signed-off-by: Pankaj Signed-off-by: ramkrish86 Signed-off-by: stack Signed-off-by: Xu Cang Signed-off-by: Reid Chan --- .../apache/hadoop/hbase/util/RegionMover.java | 28 +++- .../hadoop/hbase/util/TestRegionMover.java | 144 +++++++++++++++++- 2 files changed, 163 insertions(+), 9 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java index 373321303bf..d6ca2af6451 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java @@ -34,9 +34,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; @@ -69,6 +71,7 @@ import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; /** * Tool for loading/unloading regions to/from given regionserver This tool can be run from Command @@ -81,13 +84,15 @@ import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; */ @InterfaceAudience.Public public class RegionMover extends AbstractHBaseTool implements Closeable { - public static final String MOVE_RETRIES_MAX_KEY = "hbase.move.retries.max"; - public static final String MOVE_WAIT_MAX_KEY = "hbase.move.wait.max"; - public static final String SERVERSTART_WAIT_MAX_KEY = "hbase.serverstart.wait.max"; - public static final int DEFAULT_MOVE_RETRIES_MAX = 5; - public static final int DEFAULT_MOVE_WAIT_MAX = 60; - public static final int DEFAULT_SERVERSTART_WAIT_MAX = 180; - static final Logger LOG = LoggerFactory.getLogger(RegionMover.class); + private static final String MOVE_RETRIES_MAX_KEY = "hbase.move.retries.max"; + private static final String MOVE_WAIT_MAX_KEY = "hbase.move.wait.max"; + static final String SERVERSTART_WAIT_MAX_KEY = "hbase.serverstart.wait.max"; + private static final int DEFAULT_MOVE_RETRIES_MAX = 5; + private static final int DEFAULT_MOVE_WAIT_MAX = 60; + private static final int DEFAULT_SERVERSTART_WAIT_MAX = 180; + + private static final Logger LOG = LoggerFactory.getLogger(RegionMover.class); + private RegionMoverBuilder rmbuilder; private boolean ack = true; private int maxthreads = 1; @@ -432,6 +437,15 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { } // Remove RS present in the exclude file stripExcludes(regionServers); + + // Remove decommissioned RS + Set decommissionedRS = new HashSet<>(admin.listDecommissionedRegionServers()); + if (CollectionUtils.isNotEmpty(decommissionedRS)) { + regionServers.removeIf(decommissionedRS::contains); + LOG.debug("Excluded RegionServers from unloading regions to because they " + + "are marked as decommissioned. Servers: {}", decommissionedRS); + } + stripMaster(regionServers); if (regionServers.isEmpty()) { LOG.warn("No Regions were moved - no servers available"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover.java index 0d768e4075b..4d18f4bb6b7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover.java @@ -22,23 +22,30 @@ import static org.junit.Assert.assertFalse; import java.io.File; import java.io.FileWriter; +import java.io.IOException; +import java.util.Collections; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Waiter.Predicate; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; -import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MiscTests; import org.apache.hadoop.hbase.util.RegionMover.RegionMoverBuilder; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -51,7 +58,7 @@ import org.slf4j.LoggerFactory; * Tests for Region Mover Load/Unload functionality with and without ack mode and also to test * exclude functionality useful for rack decommissioning */ -@Category({ MiscTests.class, MediumTests.class }) +@Category({MiscTests.class, LargeTests.class}) public class TestRegionMover { @ClassRule @@ -238,4 +245,137 @@ public class TestRegionMover { assertFalse(rm.load()); } } + + @Test + public void testDecomServerExclusionWithAck() throws Exception { + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + HRegionServer excludeServer = cluster.getRegionServer(1); + List regions = excludeServer.getRegions(); + int regionsExcludeServer = excludeServer.getNumberOfOnlineRegions(); + TEST_UTIL.getAdmin().decommissionRegionServers( + Collections.singletonList(excludeServer.getServerName()), false); + + waitForServerDecom(excludeServer); + + HRegionServer regionServer = cluster.getRegionServer(0); + String rsName = regionServer.getServerName().getHostname(); + int port = regionServer.getServerName().getPort(); + String hostname = rsName + ":" + Integer.toString(port); + RegionMoverBuilder rmBuilder = + new RegionMoverBuilder(hostname, TEST_UTIL.getConfiguration()) + .ack(true); + + int targetServerRegions = cluster.getRegionServer(2).getRegions().size(); + int sourceServerRegions = regionServer.getRegions().size(); + + try (RegionMover regionMover = rmBuilder.build()) { + Assert.assertTrue(regionMover.unload()); + LOG.info("Unloading {}", hostname); + assertEquals(0, regionServer.getNumberOfOnlineRegions()); + assertEquals(regionsExcludeServer, cluster.getRegionServer(1).getNumberOfOnlineRegions()); + LOG.info("Before:" + regionsExcludeServer + " After:" + + cluster.getRegionServer(1).getNumberOfOnlineRegions()); + List regionList = cluster.getRegionServer(1).getRegions(); + int index = 0; + for (HRegion hRegion : regionList) { + Assert.assertEquals(hRegion, regions.get(index++)); + } + Assert.assertEquals(targetServerRegions + sourceServerRegions, + cluster.getRegionServer(2).getNumberOfOnlineRegions()); + Assert.assertTrue(regionMover.load()); + } + + TEST_UTIL.getAdmin().recommissionRegionServer(excludeServer.getServerName(), + Collections.emptyList()); + } + + private void waitForServerDecom(HRegionServer excludeServer) { + + TEST_UTIL.waitFor(3000, () -> { + try { + List decomServers = TEST_UTIL.getAdmin().listDecommissionedRegionServers(); + return decomServers.size() == 1 + && decomServers.get(0).equals(excludeServer.getServerName()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + public void testDecomServerExclusion() throws Exception { + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + HRegionServer excludeServer = cluster.getRegionServer(0); + List regions = excludeServer.getRegions(); + int regionsExcludeServer = excludeServer.getNumberOfOnlineRegions(); + TEST_UTIL.getAdmin().decommissionRegionServers( + Collections.singletonList(excludeServer.getServerName()), false); + + waitForServerDecom(excludeServer); + + HRegionServer sourceRegionServer = cluster.getRegionServer(1); + String rsName = sourceRegionServer.getServerName().getHostname(); + int port = sourceRegionServer.getServerName().getPort(); + String hostname = rsName + ":" + Integer.toString(port); + RegionMoverBuilder rmBuilder = + new RegionMoverBuilder(hostname, TEST_UTIL.getConfiguration()).ack(false); + + int targetServerRegions = cluster.getRegionServer(2).getRegions().size(); + int sourceServerRegions = sourceRegionServer.getRegions().size(); + + try (RegionMover regionMover = rmBuilder.build()) { + Assert.assertTrue(regionMover.unload()); + LOG.info("Unloading {}", hostname); + assertEquals(0, sourceRegionServer.getNumberOfOnlineRegions()); + assertEquals(regionsExcludeServer, cluster.getRegionServer(0).getNumberOfOnlineRegions()); + LOG.info("Before:" + regionsExcludeServer + " After:" + + cluster.getRegionServer(1).getNumberOfOnlineRegions()); + List regionList = cluster.getRegionServer(0).getRegions(); + int index = 0; + for (HRegion hRegion : regionList) { + Assert.assertEquals(hRegion, regions.get(index++)); + } + Assert.assertEquals(targetServerRegions + sourceServerRegions, + cluster.getRegionServer(2).getNumberOfOnlineRegions()); + Assert.assertTrue(regionMover.load()); + } + + TEST_UTIL.getAdmin().recommissionRegionServer(excludeServer.getServerName(), + Collections.emptyList()); + } + + @Test + public void testExcludeAndDecomServers() throws Exception { + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + File excludeFile = new File(TEST_UTIL.getDataTestDir().toUri().getPath(), "exclude_file"); + FileWriter fos = new FileWriter(excludeFile); + HRegionServer excludeServer = cluster.getRegionServer(1); + String excludeHostname = excludeServer.getServerName().getHostname(); + int excludeServerPort = excludeServer.getServerName().getPort(); + String excludeServerName = excludeHostname + ":" + Integer.toString(excludeServerPort); + fos.write(excludeServerName); + fos.close(); + + HRegionServer decomServer = cluster.getRegionServer(2); + TEST_UTIL.getAdmin().decommissionRegionServers( + Collections.singletonList(decomServer.getServerName()), false); + + waitForServerDecom(decomServer); + + HRegionServer regionServer = cluster.getRegionServer(0); + String rsName = regionServer.getServerName().getHostname(); + int port = regionServer.getServerName().getPort(); + String sourceServer = rsName + ":" + Integer.toString(port); + RegionMoverBuilder rmBuilder = + new RegionMoverBuilder(sourceServer, TEST_UTIL.getConfiguration()) + .ack(true) + .excludeFile(excludeFile.getCanonicalPath()); + try (RegionMover regionMover = rmBuilder.build()) { + Assert.assertFalse(regionMover.unload()); + } + + TEST_UTIL.getAdmin().recommissionRegionServer(decomServer.getServerName(), + Collections.emptyList()); + } + }