From a49850e5c3a648a93ecca848d5d58ca7da9b2d4d Mon Sep 17 00:00:00 2001 From: Jerry He Date: Tue, 24 Oct 2017 07:53:17 -0700 Subject: [PATCH] HBASE-19021 Restore a few important missing logics for balancer in 2.0 Signed-off-by: Jerry He --- .../apache/hadoop/hbase/master/HMaster.java | 9 ++++--- .../hbase/master/assignment/RegionStates.java | 8 +++++++ .../procedure/ServerCrashProcedure.java | 1 + .../hadoop/hbase/TestRegionRebalancing.java | 24 +++++++++++++++---- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 8f2ae6b2097..bb36520da5e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1421,16 +1421,19 @@ public class HMaster extends HRegionServer implements MasterServices { } } + boolean isByTable = getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false); Map>> assignmentsByTable = - this.assignmentManager.getRegionStates().getAssignmentsByTable(); + this.assignmentManager.getRegionStates().getAssignmentsByTable(!isByTable); List plans = new ArrayList<>(); //Give the balancer the current cluster state. this.balancer.setClusterStatus(getClusterStatus()); - this.balancer.setClusterLoad( - this.assignmentManager.getRegionStates().getAssignmentsByTable()); + this.balancer.setClusterLoad(assignmentsByTable); + for (Map> serverMap : assignmentsByTable.values()) { + serverMap.keySet().removeAll(this.serverManager.getDrainingServersList()); + } for (Entry>> e : assignmentsByTable.entrySet()) { List partialPlans = this.balancer.balanceCluster(e.getKey(), e.getValue()); if (partialPlans != null) plans.addAll(partialPlans); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index c13a49d723d..3b58fe26ce8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -756,6 +756,14 @@ public class RegionStates { serverResult.add(node.getRegionInfo()); } + // Add online servers with no assignment for the table. + for (Map> table: result.values()) { + for (ServerName svr : serverMap.keySet()) { + if (!table.containsKey(svr)) { + table.put(svr, new ArrayList()); + } + } + } return result; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index a0ee6282574..56efaeb0668 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -172,6 +172,7 @@ implements ServerProcedureInterface { break; case SERVER_CRASH_FINISH: + services.getAssignmentManager().getRegionStates().removeServer(serverName); services.getServerManager().getDeadServers().finish(serverName); return Flow.NO_MORE_STATE; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java index 3f7ea3b2ade..cb9f7686bbf 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -53,9 +54,9 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; /** * Test whether region re-balancing works. (HBASE-71) + * The test only works for cluster wide balancing, not per table wide. + * Increase the margin a little to make StochasticLoadBalancer result acceptable. */ -@Ignore // This is broken since new RegionServers does proper average of regions -// and because Master is treated as a regionserver though it hosts two regions only. @Category({FlakeyTests.class, LargeTests.class}) @RunWith(value = Parameterized.class) public class TestRegionRebalancing { @@ -131,6 +132,7 @@ public class TestRegionRebalancing { // add a region server - total of 3 LOG.info("Started third server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); + waitForAllRegionsAssigned(); assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); @@ -147,12 +149,14 @@ public class TestRegionRebalancing { LOG.info("Added fourth server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); waitOnCrashProcessing(); + waitForAllRegionsAssigned(); assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); for (int i = 0; i < 6; i++){ LOG.info("Adding " + (i + 5) + "th region server"); UTIL.getHBaseCluster().startRegionServer(); } + waitForAllRegionsAssigned(); assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); regionLocator.close(); @@ -188,9 +192,14 @@ public class TestRegionRebalancing { long regionCount = UTIL.getMiniHBaseCluster().countServedRegions(); List servers = getOnlineRegionServers(); - double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad(); + double avg = (double)regionCount / (double)servers.size(); int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop)); int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1; + // Increase the margin a little to accommodate StochasticLoadBalancer + if (this.balancerName.contains("StochasticLoadBalancer")) { + avgLoadPlusSlop++; + avgLoadMinusSlop--; + } LOG.debug("There are " + servers.size() + " servers and " + regionCount + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop + ", up border: " + avgLoadPlusSlop + "; attempt: " + i); @@ -250,13 +259,20 @@ public class TestRegionRebalancing { */ private void waitForAllRegionsAssigned() throws IOException { int totalRegions = HBaseTestingUtility.KEYS.length; + try { + Thread.sleep(200); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) { // while (!cluster.getMaster().allRegionsAssigned()) { LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are " + UTIL.getMiniHBaseCluster().countServedRegions() + " right now."); try { Thread.sleep(200); - } catch (InterruptedException e) {} + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } } UTIL.waitUntilNoRegionsInTransition(); }