From 9ad69f3bd048c833b6307582dd3bb4810ab545f0 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Sun, 20 Mar 2011 02:41:12 +0000 Subject: [PATCH] HBASE-3422 Balancer will try to rebalance thousands of regions in one go; needs an upper bound added git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1083362 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../apache/hadoop/hbase/master/HMaster.java | 37 ++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5e03d708678..56e8aff9fef 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -80,6 +80,8 @@ Release 0.91.0 - Unreleased completebulkload tool (Vidhyashankar Venkataraman via Stack) HBASE-3653 Parallelize Server Requests on HBase Client HBASE-3657 reduce copying of HRegionInfo's (Ted Yu via Stack) + HBASE-3422 Balancer will try to rebalance thousands of regions in one go; + needs an upper bound added (Ted Yu via Stack) TASK HBASE-3559 Move report of split to master OFF the heartbeat channel diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 5e0e0bcca31..72d1d4ce685 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -593,9 +593,10 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { private static Thread getAndStartBalancerChore(final HMaster master) { String name = master.getServerName() + "-BalancerChore"; - int period = master.getConfiguration().getInt("hbase.balancer.period", 300000); + int balancerPeriod = + master.getConfiguration().getInt("hbase.balancer.period", 300000); // Start up the load balancer chore - Chore chore = new Chore(name, period, master) { + Chore chore = new Chore(name, balancerPeriod, master) { @Override protected void chore() { master.balance(); @@ -674,10 +675,30 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { return !isStopped(); } + /** + * @return Maximum time we should run balancer for + */ + private int getBalancerCutoffTime() { + int balancerCutoffTime = + getConfiguration().getInt("hbase.balancer.max.balancing", -1); + if (balancerCutoffTime == -1) { + // No time period set so create one -- do half of balancer period. + int balancerPeriod = + getConfiguration().getInt("hbase.balancer.period", 300000); + balancerCutoffTime = balancerPeriod / 2; + // If nonsense period, set it to balancerPeriod + if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod; + } + return balancerCutoffTime; + } + @Override public boolean balance() { // If balance not true, don't run balancer. if (!this.balanceSwitch) return false; + // Do this call outside of synchronized block. + int maximumBalanceTime = getBalancerCutoffTime(); + long cutoffTime = System.currentTimeMillis() + maximumBalanceTime; synchronized (this.balancer) { // Only allow one balance run at at time. if (this.assignmentManager.isRegionsInTransition()) { @@ -717,10 +738,22 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } } List plans = this.balancer.balanceCluster(assignments); + int rpCount = 0; // number of RegionPlans balanced so far + long totalRegPlanExecTime = 0; if (plans != null && !plans.isEmpty()) { for (RegionPlan plan: plans) { LOG.info("balance " + plan); + long balStartTime = System.currentTimeMillis(); this.assignmentManager.balance(plan); + totalRegPlanExecTime += System.currentTimeMillis()-balStartTime; + rpCount++; + if (rpCount < plans.size() && + // if performing next balance exceeds cutoff time, exit the loop + (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) { + LOG.debug("No more balancing till next balance run; maximumBalanceTime=" + + maximumBalanceTime); + break; + } } } if (this.cpHost != null) {