diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 446405583ff..226ab552bf2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -998,7 +998,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { long cutoffTime = System.currentTimeMillis() + maximumBalanceTime; int rpCount = 0; // number of RegionPlans balanced so far long totalRegPlanExecTime = 0; - balancerRan = plans != null; + balancerRan = plans.size() != 0; if (plans != null && !plans.isEmpty()) { for (RegionPlan plan: plans) { LOG.info("balance " + plan); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 4053d6af1fa..394f8dd31bc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -143,6 +143,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { int numMovedRegions = 0; //num moved regions from the initial configuration // num of moved regions away from master that should be on the master int numMovedMasterHostedRegions = 0; + int numMovedMetaRegions = 0; //num of moved regions that are META + Map> clusterState; protected final RackManager rackManager; @@ -188,6 +190,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { List> serversPerHostList = new ArrayList>(); List> serversPerRackList = new ArrayList>(); + this.clusterState = clusterState; // Use servername and port as there can be dead servers in this list. We want everything with // a matching hostname and port to have the same index. @@ -1031,7 +1034,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return clusterStatus == null ? null : clusterStatus.getBackupMasters(); } - protected boolean needsBalance(ClusterLoadState cs) { + protected boolean needsBalance(Cluster c) { + ClusterLoadState cs = new ClusterLoadState( + masterServerName, getBackupMasters(), backupMasterWeight, c.clusterState); if (cs.getNumServers() < MIN_SERVER_BALANCE) { if (LOG.isDebugEnabled()) { LOG.debug("Not running balancer because only " + cs.getNumServers() @@ -1039,8 +1044,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } return false; } - // TODO: check for co-located region replicas as well - + if(areSomeRegionReplicasColocated(c)) return true; // Check if we even need to do any load balancing // HBASE-3681 check sloppiness first float average = cs.getLoadAverage(); // for logging @@ -1061,6 +1065,17 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return true; } + /** + * Subclasses should implement this to return true if the cluster has nodes that hosts + * multiple replicas for the same region, or, if there are multiple racks and the same + * rack hosts replicas of the same region + * @param c + * @return + */ + protected boolean areSomeRegionReplicasColocated(Cluster c) { + return false; + } + /** * Generates a bulk assignment plan to be used on cluster startup using a * simple round-robin assignment. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java index a3b85ced083..db69d6eda69 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java @@ -104,21 +104,21 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { * have either floor(average) or ceiling(average) regions. * * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that - * we can fetch from both ends of the queue. - * At the beginning, we check whether there was empty region server + * we can fetch from both ends of the queue. + * At the beginning, we check whether there was empty region server * just discovered by Master. If so, we alternately choose new / old * regions from head / tail of regionsToMove, respectively. This alternation * avoids clustering young regions on the newly discovered region server. * Otherwise, we choose new regions from head of regionsToMove. - * + * * Another improvement from HBASE-3609 is that we assign regions from * regionsToMove to underloaded servers in round-robin fashion. * Previously one underloaded server would be filled before we move onto * the next underloaded server, leading to clustering of young regions. - * + * * Finally, we randomly shuffle underloaded servers so that they receive * offloaded regions relatively evenly across calls to balanceCluster(). - * + * * The algorithm is currently implemented as such: * *
    @@ -179,6 +179,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { * @return a list of regions to be moved, including source and destination, * or null if cluster is already balanced */ + @Override public List balanceCluster( Map> clusterMap) { List regionsToReturn = balanceMasterRegions(clusterMap); @@ -192,9 +193,12 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { Collection backupMasters = getBackupMasters(); ClusterLoadState cs = new ClusterLoadState(masterServerName, backupMasters, backupMasterWeight, clusterMap); + // construct a Cluster object with clusterMap and rest of the + // argument as defaults + Cluster c = new Cluster(masterServerName, clusterMap, null, this.regionFinder, + getBackupMasters(), tablesOnMaster, this.rackManager); + if (!this.needsBalance(c)) return null; - if (!this.needsBalance(cs)) return null; - int numServers = cs.getNumServers(); NavigableMap> serversByLoad = cs.getServersByLoad(); int numRegions = cs.getNumRegions(); @@ -239,7 +243,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { } int numToOffload = Math.min((load - max) / w, regions.size()); // account for the out-of-band regions which were assigned to this server - // after some other region server crashed + // after some other region server crashed Collections.sort(regions, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload; ) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 1d98cdd01a8..b0e9461964f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -122,6 +122,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // when new services are offered private LocalityBasedCandidateGenerator localityCandidateGenerator; private LocalityCostFunction localityCost; + private RegionReplicaHostCostFunction regionReplicaHostCostFunction; + private RegionReplicaRackCostFunction regionReplicaRackCostFunction; @Override public void setConf(Configuration conf) { @@ -151,13 +153,16 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { new StoreFileCostFunction(conf) }; + regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf); + regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf); + costFunctions = new CostFunction[]{ new RegionCountSkewCostFunction(conf, activeMasterWeight, backupMasterWeight), new MoveCostFunction(conf), localityCost, new TableSkewCostFunction(conf), - new RegionReplicaHostCostFunction(conf), - new RegionReplicaRackCostFunction(conf), + regionReplicaHostCostFunction, + regionReplicaRackCostFunction, regionLoadFunctions[0], regionLoadFunctions[1], regionLoadFunctions[2], @@ -187,6 +192,15 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { } + @Override + protected boolean areSomeRegionReplicasColocated(Cluster c) { + regionReplicaHostCostFunction.init(c); + if (regionReplicaHostCostFunction.cost() > 0) return true; + regionReplicaRackCostFunction.init(c); + if (regionReplicaRackCostFunction.cost() > 0) return true; + return false; + } + /** * Given the cluster state this will try and approach an optimal balance. This * should always approach the optimal state given enough steps. @@ -198,16 +212,17 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { return plans; } filterExcludedServers(clusterState); - if (!needsBalance(new ClusterLoadState(masterServerName, - getBackupMasters(), backupMasterWeight, clusterState))) { + //The clusterState that is given to this method contains the state + //of all the regions in the table(s) (that's true today) + // Keep track of servers to iterate through them. + Cluster cluster = new Cluster(masterServerName, + clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager); + if (!needsBalance(cluster)) { return null; } long startTime = EnvironmentEdgeManager.currentTimeMillis(); - // Keep track of servers to iterate through them. - Cluster cluster = new Cluster(masterServerName, - clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager); initCosts(cluster); double currentCost = computeCost(cluster, Double.MAX_VALUE); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java index 0b0e2901dc5..71973426449 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java @@ -118,10 +118,14 @@ public class TestRegionRebalancing { UTIL.getHBaseCluster().getMaster().balance(); assertRegionsAreBalanced(); + // On a balanced cluster, calling balance() should return false + assert(UTIL.getHBaseCluster().getMaster().balance() == false); + + // However if we add a server, then the balance() call should return true // add a region server - total of 3 LOG.info("Started third server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); - UTIL.getHBaseCluster().getMaster().balance(); + assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); // kill a region server - total of 2 @@ -135,14 +139,14 @@ public class TestRegionRebalancing { UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); LOG.info("Added fourth server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); - UTIL.getHBaseCluster().getMaster().balance(); + assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); for (int i = 0; i < 6; i++){ LOG.info("Adding " + (i + 5) + "th region server"); UTIL.getHBaseCluster().startRegionServer(); } - UTIL.getHBaseCluster().getMaster().balance(); + assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); table.close(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index 6aee367891e..1f50774919d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.when; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -46,6 +47,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.net.DNSToSwitchMapping; import org.apache.hadoop.net.NetworkTopology; @@ -383,6 +385,34 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { assertTrue(costWith2ReplicasOnTwoServers < costWith3ReplicasSameServer); } + @Test + public void testNeedsBalanceForColocatedReplicas() { + // check for the case where there are two hosts and with one rack, and where + // both the replicas are hosted on the same server + List regions = randomRegions(1); + ServerName s1 = ServerName.valueOf("host1", 1000, 11111); + ServerName s2 = ServerName.valueOf("host11", 1000, 11111); + Map> map = new HashMap>(); + map.put(s1, regions); + regions.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1)); + // until the step above s1 holds two replicas of a region + regions = randomRegions(1); + map.put(s2, regions); + assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null, null))); + // check for the case where there are two hosts on the same rack and there are two racks + // and both the replicas are on the same rack + map.clear(); + regions = randomRegions(1); + List regionsOnS2 = new ArrayList(1); + regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1)); + map.put(s1, regions); + map.put(s2, regionsOnS2); + // add another server so that the cluster has some host on another rack + map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1)); + assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null, + new ForTestRackManagerOne()))); + } + @Test (timeout = 60000) public void testSmallCluster() { int numNodes = 10; @@ -547,6 +577,13 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { } } + private static class ForTestRackManagerOne extends RackManager { + @Override + public String getRack(ServerName server) { + return server.getHostname().endsWith("1") ? "rack1" : "rack2"; + } + } + @Test (timeout = 120000) public void testRegionReplicationOnMidClusterWithRacks() { conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L); @@ -650,7 +687,6 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public MyRackResolver(Configuration conf) {} - @Override public List resolve(List names) { List racks = new ArrayList(names.size()); for (int i = 0; i < names.size(); i++) { @@ -659,10 +695,8 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { return racks; } - @Override public void reloadCachedMappings() {} - @Override public void reloadCachedMappings(List names) { } }