diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index e358895f929..ad68fb1802e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -128,6 +128,22 @@ public final class HConstants { /** Config for balancing the cluster by table */ public static final String HBASE_MASTER_LOADBALANCE_BYTABLE = "hbase.master.loadbalance.bytable"; + /** Config for the max percent of regions in transition */ + public static final String HBASE_MASTER_BALANCER_MAX_RIT_PERCENT = + "hbase.master.balancer.maxRitPercent"; + + /** Default value for the max percent of regions in transition */ + public static final double DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT = 1.0; + + /** Config for the max balancing time */ + public static final String HBASE_BALANCER_MAX_BALANCING = "hbase.balancer.max.balancing"; + + /** Config for the balancer period */ + public static final String HBASE_BALANCER_PERIOD = "hbase.balancer.period"; + + /** Default value for the balancer period */ + public static final int DEFAULT_HBASE_BALANCER_PERIOD = 300000; + /** The name of the ensemble table */ public static final String ENSEMBLE_TABLE_NAME = "hbase:ensemble"; diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index 16c88492e5c..46a4050b3e1 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -564,6 +564,14 @@ possible configurations would overwhelm and obscure the important. to atomic bulk loads are attempted in the face of splitting operations 0 means never give up. + + hbase.master.balancer.maxRitPercent + 1.0 + The max percent of regions in transition when balancing. + The default value is 1.0. So there are no balancer throttling. If set this config to 0.01, + It means that there are at most 1% regions in transition when balancing. + Then the cluster's availability is at least 99% when balancing. + hbase.balancer.period diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 1845c08dad7..cf54397c916 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -309,6 +309,11 @@ public class HMaster extends HRegionServer implements MasterServices, Server { private final ProcedureEvent serverCrashProcessingEnabled = new ProcedureEvent("server crash processing"); + // Maximum time we should run balancer for + private final int maxBlancingTime; + // Maximum percent of regions in transition when balancing + private final double maxRitPercent; + LoadBalancer balancer; private RegionNormalizer normalizer; private BalancerChore balancerChore; @@ -432,6 +437,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // preload table descriptor at startup this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true); + this.maxBlancingTime = getMaxBalancingTime(); + this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT, + HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT); + // Do we publish the status? boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED, @@ -1298,18 +1307,61 @@ public class HMaster extends HRegionServer implements MasterServices, Server { /** * @return Maximum time we should run balancer for */ - private int getBalancerCutoffTime() { - int balancerCutoffTime = - getConfiguration().getInt("hbase.balancer.max.balancing", -1); - if (balancerCutoffTime == -1) { + private int getMaxBalancingTime() { + int maxBalancingTime = getConfiguration(). + getInt(HConstants.HBASE_BALANCER_MAX_BALANCING, -1); + if (maxBalancingTime == -1) { // No time period set so create one - int balancerPeriod = - getConfiguration().getInt("hbase.balancer.period", 300000); - balancerCutoffTime = balancerPeriod; - // If nonsense period, set it to balancerPeriod - if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod; + maxBalancingTime = getConfiguration().getInt(HConstants.HBASE_BALANCER_PERIOD, + HConstants.DEFAULT_HBASE_BALANCER_PERIOD); } - return balancerCutoffTime; + return maxBalancingTime; + } + + /** + * @return Maximum number of regions in transition + */ + private int getMaxRegionsInTransition() { + int numRegions = this.assignmentManager.getRegionStates().getRegionAssignments().size(); + return Math.max((int) Math.floor(numRegions * this.maxRitPercent), 1); + } + + /** + * It first sleep to the next balance plan start time. Meanwhile, throttling by the max + * number regions in transition to protect availability. + * @param nextBalanceStartTime The next balance plan start time + * @param maxRegionsInTransition max number of regions in transition + * @param cutoffTime when to exit balancer + */ + private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransition, + long cutoffTime) { + boolean interrupted = false; + + // Sleep to next balance plan start time + // But if there are zero regions in transition, it can skip sleep to speed up. + while (!interrupted && System.currentTimeMillis() < nextBalanceStartTime + && this.assignmentManager.getRegionStates().getRegionsInTransitionCount() != 0) { + try { + Thread.sleep(100); + } catch (InterruptedException ie) { + interrupted = true; + } + } + + // Throttling by max number regions in transition + while (!interrupted + && maxRegionsInTransition > 0 + && this.assignmentManager.getRegionStates().getRegionsInTransitionCount() + >= maxRegionsInTransition && System.currentTimeMillis() <= cutoffTime) { + try { + // sleep if the number of regions in transition exceeds the limit + Thread.sleep(100); + } catch (InterruptedException ie) { + interrupted = true; + } + } + + if (interrupted) Thread.currentThread().interrupt(); } public boolean balance() throws IOException { @@ -1328,8 +1380,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { return false; } - // Do this call outside of synchronized block. - int maximumBalanceTime = getBalancerCutoffTime(); + int maxRegionsInTransition = getMaxRegionsInTransition(); synchronized (this.balancer) { // If balance not true, don't run balancer. if (!this.loadBalancerTracker.isBalancerOn()) return false; @@ -1376,27 +1427,35 @@ public class HMaster extends HRegionServer implements MasterServices, Server { if (partialPlans != null) plans.addAll(partialPlans); } - long cutoffTime = System.currentTimeMillis() + maximumBalanceTime; + long balanceStartTime = System.currentTimeMillis(); + long cutoffTime = balanceStartTime + this.maxBlancingTime; int rpCount = 0; // number of RegionPlans balanced so far - long totalRegPlanExecTime = 0; if (plans != null && !plans.isEmpty()) { + int balanceInterval = this.maxBlancingTime / plans.size(); + LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is " + + balanceInterval + " ms, and the max number regions in transition is " + + maxRegionsInTransition); + for (RegionPlan plan: plans) { LOG.info("balance " + plan); - long balStartTime = System.currentTimeMillis(); //TODO: bulk assign this.assignmentManager.balance(plan); - totalRegPlanExecTime += System.currentTimeMillis()-balStartTime; rpCount++; - if (rpCount < plans.size() && - // if performing next balance exceeds cutoff time, exit the loop - (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) { - //TODO: After balance, there should not be a cutoff time (keeping it as a security net for now) - LOG.debug("No more balancing till next balance run; maximumBalanceTime=" + - maximumBalanceTime); + + balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition, + cutoffTime); + + // if performing next balance exceeds cutoff time, exit the loop + if (rpCount < plans.size() && System.currentTimeMillis() > cutoffTime) { + // TODO: After balance, there should not be a cutoff time (keeping it as a security net + // for now) + LOG.debug("No more balancing till next balance run; maxBalanceTime=" + + this.maxBlancingTime); break; } } } + if (this.cpHost != null) { try { this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index a71c7b790f0..88de126361e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -240,6 +240,13 @@ public class RegionStates { return rit; } + /** + * Get the number of regions in transition. + */ + public synchronized int getRegionsInTransitionCount() { + return regionsInTransition.size(); + } + /** * @return True if specified region in transition. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java index bbbfdf2c6a9..37004edf929 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.master.HMaster; @@ -38,7 +39,7 @@ public class BalancerChore extends ScheduledChore { public BalancerChore(HMaster master) { super(master.getServerName() + "-BalancerChore", master, master.getConfiguration().getInt( - "hbase.balancer.period", 300000)); + HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD)); this.master = master; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java new file mode 100644 index 00000000000..3d394d4572f --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java @@ -0,0 +1,151 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({MasterTests.class, MediumTests.class}) +public class TestMasterBalanceThrottling { + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final byte[] FAMILYNAME = Bytes.toBytes("fam"); + + @Before + public void setupConfiguration() { + TEST_UTIL.getConfiguration().set(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, + "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer"); + } + + @After + public void shutdown() throws Exception { + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_MAX_BALANCING, + HConstants.DEFAULT_HBASE_BALANCER_PERIOD); + TEST_UTIL.getConfiguration().setDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT, + HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT); + TEST_UTIL.shutdownMiniCluster(); + } + + @Test(timeout = 60000) + public void testThrottlingByBalanceInterval() throws Exception { + // Use default config and start a cluster of two regionservers. + TEST_UTIL.startMiniCluster(2); + + TableName tableName = createTable("testNoThrottling"); + final HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + + // Default max balancing time is 300000 ms and there are 50 regions to balance + // The balance interval is 6000 ms, much longger than the normal region in transition duration + // So the master can balance the region one by one + unbalance(master, tableName); + AtomicInteger maxCount = new AtomicInteger(0); + AtomicBoolean stop = new AtomicBoolean(false); + Thread checker = startBalancerChecker(master, maxCount, stop); + master.balance(); + stop.set(true); + checker.interrupt(); + checker.join(); + assertTrue("max regions in transition: " + maxCount.get(), maxCount.get() == 1); + + TEST_UTIL.deleteTable(tableName); + } + + @Test(timeout = 60000) + public void testThrottlingByMaxRitPercent() throws Exception { + // Set max balancing time to 500 ms and max percent of regions in transition to 0.05 + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_MAX_BALANCING, 500); + TEST_UTIL.getConfiguration().setDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT, 0.05); + TEST_UTIL.startMiniCluster(2); + + TableName tableName = createTable("testThrottlingByMaxRitPercent"); + final HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + + unbalance(master, tableName); + AtomicInteger maxCount = new AtomicInteger(0); + AtomicBoolean stop = new AtomicBoolean(false); + Thread checker = startBalancerChecker(master, maxCount, stop); + master.balance(); + stop.set(true); + checker.interrupt(); + checker.join(); + // The max number of regions in transition is 100 * 0.05 = 5 + assertTrue("max regions in transition: " + maxCount.get(), maxCount.get() == 5); + + TEST_UTIL.deleteTable(tableName); + } + + private TableName createTable(String table) throws IOException { + TableName tableName = TableName.valueOf(table); + byte[] startKey = new byte[] { 0x00 }; + byte[] stopKey = new byte[] { 0x7f }; + TEST_UTIL.createTable(tableName, new byte[][] { FAMILYNAME }, 1, startKey, stopKey, + 100); + return tableName; + } + + private Thread startBalancerChecker(final HMaster master, final AtomicInteger maxCount, + final AtomicBoolean stop) { + Runnable checker = new Runnable() { + @Override + public void run() { + while (!stop.get()) { + maxCount.set(Math.max(maxCount.get(), master.getAssignmentManager().getRegionStates() + .getRegionsInTransitionCount())); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + }; + Thread thread = new Thread(checker); + thread.start(); + return thread; + } + + private void unbalance(HMaster master, TableName tableName) throws Exception { + while (master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() > 0) { + Thread.sleep(100); + } + HRegionServer biasedServer = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0); + for (HRegionInfo regionInfo : TEST_UTIL.getHBaseAdmin().getTableRegions(tableName)) { + master.move(regionInfo.getEncodedNameAsBytes(), + Bytes.toBytes(biasedServer.getServerName().getServerName())); + } + while (master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() > 0) { + Thread.sleep(100); + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index 817dc9a18c4..9c80f7bf8e8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -137,7 +137,7 @@ public class TestSplitTransactionOnCluster { new HBaseTestingUtility(); static void setupOnce() throws Exception { - TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000); + TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000); useZKForAssignment = TESTING_UTIL.getConfiguration().getBoolean( "hbase.assignment.usezk", true); TESTING_UTIL.startMiniCluster(NB_SERVERS);