From b817a156c913eecd73e7858a54878021aba14e70 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 13 Jan 2021 05:01:26 +0530 Subject: [PATCH] HBASE-25211 : Rack awareness in RegionMover (#2795) Signed-off-by: Andrew Purtell --- .../hadoop/hbase/master/RackManager.java | 3 - .../apache/hadoop/hbase/util/RegionMover.java | 68 ++++++- .../hadoop/hbase/util/TestRegionMover3.java | 188 ++++++++++++++++++ 3 files changed, 254 insertions(+), 5 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover3.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java index 3ed20065a67..54ccac0cb62 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java @@ -22,8 +22,6 @@ import java.util.Collections; import java.util.List; import org.apache.yetus.audience.InterfaceAudience; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.ReflectionUtils; @@ -36,7 +34,6 @@ import org.apache.hadoop.net.ScriptBasedMapping; */ @InterfaceAudience.Private public class RackManager { - private static final Logger LOG = LoggerFactory.getLogger(RackManager.class); public static final String UNKNOWN_RACK = "Unknown Rack"; private DNSToSwitchMapping switchMapping; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java index d49fa366ee3..57290deef30 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.DoNotRetryRegionException; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -83,6 +84,7 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { public static final int DEFAULT_MOVE_RETRIES_MAX = 5; public static final int DEFAULT_MOVE_WAIT_MAX = 60; public static final int DEFAULT_SERVERSTART_WAIT_MAX = 180; + private final RackManager rackManager; private static final Logger LOG = LoggerFactory.getLogger(RegionMover.class); @@ -111,9 +113,16 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { setConf(builder.conf); this.conn = ConnectionFactory.createConnection(conf); this.admin = conn.getAdmin(); + // Only while running unit tests, builder.rackManager will not be null for the convenience of + // providing custom rackManager. Otherwise for regular workflow/user triggered action, + // builder.rackManager is supposed to be null. Hence, setter of builder.rackManager is + // provided as @InterfaceAudience.Private and it is commented that this is just + // to be used by unit test. + rackManager = builder.rackManager == null ? new RackManager(conf) : builder.rackManager; } private RegionMover() { + rackManager = new RackManager(conf); } @Override @@ -140,6 +149,7 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { @InterfaceAudience.Private final int port; private final Configuration conf; + private RackManager rackManager; public RegionMoverBuilder(String hostname) { this(hostname, createConf()); @@ -242,6 +252,19 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { return this; } + /** + * Set specific rackManager implementation. + * This setter method is for testing purpose only. + * + * @param rackManager rackManager impl + * @return RegionMoverBuilder object + */ + @InterfaceAudience.Private + public RegionMoverBuilder rackManager(RackManager rackManager) { + this.rackManager = rackManager; + return this; + } + /** * This method builds the appropriate RegionMover object which can then be used to load/unload * using load and unload methods @@ -325,9 +348,31 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { * server,hence it is best effort.We do not unload regions to hostnames given in * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions * to hostnames provided in {@link #designatedFile} + * * @return true if unloading succeeded, false otherwise */ public boolean unload() throws InterruptedException, ExecutionException, TimeoutException { + return unloadRegions(false); + } + + /** + * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In + * noAck mode we do not make sure that region is successfully online on the target region + * server,hence it is best effort.We do not unload regions to hostnames given in + * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions + * to hostnames provided in {@link #designatedFile}. + * While unloading regions, destination RegionServers are selected from different rack i.e + * regions should not move to any RegionServers that belong to same rack as source RegionServer. + * + * @return true if unloading succeeded, false otherwise + */ + public boolean unloadFromRack() + throws InterruptedException, ExecutionException, TimeoutException { + return unloadRegions(true); + } + + private boolean unloadRegions(boolean unloadFromRack) throws InterruptedException, + ExecutionException, TimeoutException { deleteFile(this.filename); ExecutorService unloadPool = Executors.newFixedThreadPool(1); Future unloadTask = unloadPool.submit(() -> { @@ -350,6 +395,23 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { // Remove RS present in the exclude file includeExcludeRegionServers(excludeFile, regionServers, false); + if (unloadFromRack) { + // remove regionServers that belong to same rack (as source host) since the goal is to + // unload regions from source regionServer to destination regionServers + // that belong to different rack only. + String sourceRack = rackManager.getRack(server); + List racks = rackManager.getRack(regionServers); + Iterator iterator = regionServers.iterator(); + int i = 0; + while (iterator.hasNext()) { + iterator.next(); + if (racks.size() > i && racks.get(i) != null && racks.get(i).equals(sourceRack)) { + iterator.remove(); + } + i++; + } + } + // Remove decommissioned RS Set decommissionedRS = new HashSet<>(admin.listDecommissionedRegionServers()); if (CollectionUtils.isNotEmpty(decommissionedRS)) { @@ -628,7 +690,7 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { private ServerName stripServer(List regionServers, String hostname, int port) { for (Iterator iter = regionServers.iterator(); iter.hasNext();) { ServerName server = iter.next(); - if (server.getAddress().getHostname().equalsIgnoreCase(hostname) && + if (server.getAddress().getHostName().equalsIgnoreCase(hostname) && server.getAddress().getPort() == port) { iter.remove(); return server; @@ -640,7 +702,7 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { @Override protected void addOptions() { this.addRequiredOptWithArg("r", "regionserverhost", "region server |"); - this.addRequiredOptWithArg("o", "operation", "Expected: load/unload"); + this.addRequiredOptWithArg("o", "operation", "Expected: load/unload/unload_from_rack"); this.addOptWithArg("m", "maxthreads", "Define the maximum number of threads to use to unload and reload the regions"); this.addOptWithArg("x", "excludefile", @@ -693,6 +755,8 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { success = rm.load(); } else if (loadUnload.equalsIgnoreCase("unload")) { success = rm.unload(); + } else if (loadUnload.equalsIgnoreCase("unload_from_rack")) { + success = rm.unloadFromRack(); } else { printUsage(); success = false; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover3.java new file mode 100644 index 00000000000..1903fa6bf5b --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionMover3.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.master.RackManager; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + + +@Category({ MiscTests.class, LargeTests.class}) +public class TestRegionMover3 { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionMover3.class); + + @Rule + public TestName name = new TestName(); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static ServerName rs0; + private static ServerName rs1; + private static ServerName rs2; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniCluster(3); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + rs0 = cluster.getRegionServer(0).getServerName(); + rs1 = cluster.getRegionServer(1).getServerName(); + rs2 = cluster.getRegionServer(2).getServerName(); + TEST_UTIL.getAdmin().balancerSwitch(false, true); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setUp() throws Exception { + final TableName tableName = TableName.valueOf(name.getMethodName()); + TableDescriptor tableDesc = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam1")).build(); + int startKey = 0; + int endKey = 80000; + TEST_UTIL.getAdmin().createTable(tableDesc, Bytes.toBytes(startKey), Bytes.toBytes(endKey), 9); + } + + @Test + public void testRegionUnloadWithRack() throws Exception { + final TableName tableName = TableName.valueOf(name.getMethodName()); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + Admin admin = TEST_UTIL.getAdmin(); + Table table = TEST_UTIL.getConnection().getTable(tableName); + List puts = IntStream.range(10, 50000) + .mapToObj(i -> new Put(Bytes.toBytes(i)) + .addColumn(Bytes.toBytes("fam1"), Bytes.toBytes("q1"), Bytes.toBytes("val_" + i))) + .collect(Collectors.toList()); + table.put(puts); + admin.flush(tableName); + admin.compact(tableName); + Thread.sleep(3000); + HRegionServer hRegionServer0 = cluster.getRegionServer(0); + HRegionServer hRegionServer1 = cluster.getRegionServer(1); + HRegionServer hRegionServer2 = cluster.getRegionServer(2); + int numRegions0 = hRegionServer0.getNumberOfOnlineRegions(); + int numRegions1 = hRegionServer1.getNumberOfOnlineRegions(); + int numRegions2 = hRegionServer2.getNumberOfOnlineRegions(); + + Assert.assertTrue(numRegions0 >= 3); + Assert.assertTrue(numRegions1 >= 3); + Assert.assertTrue(numRegions2 >= 3); + int totalRegions = numRegions0 + numRegions1 + numRegions2; + + // source RS: rs0 + String sourceRSName = rs0.getAddress().toString(); + + // move all regions from rs1 to rs0 + for (HRegion region : hRegionServer1.getRegions()) { + TEST_UTIL.getAdmin().move(region.getRegionInfo().getEncodedNameAsBytes(), rs0); + } + TEST_UTIL.waitFor(5000, () -> { + int newNumRegions0 = hRegionServer0.getNumberOfOnlineRegions(); + int newNumRegions1 = hRegionServer1.getNumberOfOnlineRegions(); + return newNumRegions1 == 0 && newNumRegions0 == (numRegions0 + numRegions1); + }); + + // regionMover obj on rs0. While unloading regions from rs0 + // with default rackManager, which resolves "/default-rack" for each server, no region + // is moved while using unloadFromRack() as all rs belong to same rack. + RegionMover.RegionMoverBuilder rmBuilder = + new RegionMover.RegionMoverBuilder(sourceRSName, TEST_UTIL.getConfiguration()) + .ack(true) + .maxthreads(8); + try (RegionMover regionMover = rmBuilder.build()) { + regionMover.unloadFromRack(); + int newNumRegions0 = hRegionServer0.getNumberOfOnlineRegions(); + int newNumRegions1 = hRegionServer1.getNumberOfOnlineRegions(); + int newNumRegions2 = hRegionServer2.getNumberOfOnlineRegions(); + Assert.assertEquals(0, newNumRegions1); + Assert.assertEquals(totalRegions, newNumRegions0 + newNumRegions2); + } + + // use custom rackManager, which resolves "rack-1" for rs0 and rs1, + // while "rack-2" for rs2. Hence, unloadFromRack() from rs0 should move all + // regions that belong to rs0 to rs2 only, and nothing should be moved to rs1 + // as rs0 and rs1 belong to same rack. + rmBuilder.rackManager(new MockRackManager()); + try (RegionMover regionMover = rmBuilder.build()) { + regionMover.unloadFromRack(); + int newNumRegions0 = hRegionServer0.getNumberOfOnlineRegions(); + int newNumRegions1 = hRegionServer1.getNumberOfOnlineRegions(); + int newNumRegions2 = hRegionServer2.getNumberOfOnlineRegions(); + Assert.assertEquals(0, newNumRegions0); + Assert.assertEquals(0, newNumRegions1); + Assert.assertEquals(totalRegions, newNumRegions2); + } + + } + + private static class MockRackManager extends RackManager { + + private static final String RACK_2 = "rack-2"; + private static final String RACK_1 = "rack-1"; + + @Override + public String getRack(ServerName server) { + return rs2.equals(server) ? RACK_2 : RACK_1; + } + + @Override + public List getRack(List servers) { + List racks = new ArrayList<>(); + servers.forEach(serverName -> { + if (rs2.equals(serverName)) { + racks.add(RACK_2); + } else { + racks.add(RACK_1); + } + }); + return racks; + } + } + +}