From 08800e19d612f816c912e5e29ce438bf110ec523 Mon Sep 17 00:00:00 2001 From: GeorryHuang <215175212@qq.com> Date: Wed, 29 Sep 2021 21:25:39 +0800 Subject: [PATCH] HBASE-26251 StochasticLoadBalancer metrics should update even if balancer doesn't run (#3706) Signed-off-by: Duo Zhang Reviewed-by: Bryan Beaudreault --- .../rsgroup/RSGroupBasedLoadBalancer.java | 5 + .../apache/hadoop/hbase/master/HMaster.java | 24 +++++ .../hadoop/hbase/master/LoadBalancer.java | 9 ++ .../hbase/master/balancer/BalancerChore.java | 2 +- .../master/balancer/BaseLoadBalancer.java | 2 +- .../balancer/StochasticLoadBalancer.java | 51 ++++++++-- .../master/balancer/BalancerTestBase.java | 5 +- .../DummyMetricsStochasticBalancer.java | 75 +++++++++++++++ .../balancer/TestStochasticLoadBalancer.java | 96 +++++++++++++++++++ 9 files changed, 259 insertions(+), 10 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyMetricsStochasticBalancer.java diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java index e934dc08520..5db124e27ee 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java @@ -100,6 +100,11 @@ public class RSGroupBasedLoadBalancer implements RSGroupableBalancer { } @Override + public synchronized void updateBalancerLoadInfo(Map>> + loadOfAllTable){ + internalBalancer.updateBalancerLoadInfo(loadOfAllTable); + } + public void setMasterServices(MasterServices masterServices) { this.masterServices = masterServices; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 9c9c8d80c12..e14006c9420 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1776,6 +1776,30 @@ public class HMaster extends HRegionServer implements MasterServices { return balance(BalanceRequest.defaultInstance()); } + /** + * Trigger a normal balance, see {@link HMaster#balance()} . If the balance is not executed + * this time, the metrics related to the balance will be updated. + * + * When balance is running, related metrics will be updated at the same time. But if some + * checking logic failed and cause the balancer exit early, we lost the chance to update + * balancer metrics. This will lead to user missing the latest balancer info. + * */ + public BalanceResponse balanceOrUpdateMetrics() throws IOException{ + synchronized (this.balancer) { + BalanceResponse response = balance(); + if (!response.isBalancerRan()) { + Map>> assignments = + this.assignmentManager.getRegionStates().getAssignmentsForBalancer(this.tableStateManager, + this.serverManager.getOnlineServersList()); + for (Map> serverMap : assignments.values()) { + serverMap.keySet().removeAll(this.serverManager.getDrainingServersList()); + } + this.balancer.updateBalancerLoadInfo(assignments); + } + return response; + } + } + /** * Checks master state before initiating action over region topology. * @param action the name of the action under consideration, for logging. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java index a0624782bb6..a0aa9d6a68b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java @@ -170,6 +170,15 @@ public interface LoadBalancer extends Stoppable, ConfigurationObserver { /*Updates balancer status tag reported to JMX*/ void updateBalancerStatus(boolean status); + /** + * In some scenarios, Balancer needs to update internal status or information according to the + * current tables load + * + * @param loadOfAllTable region load of servers for all table + */ + default void updateBalancerLoadInfo(Map>> + loadOfAllTable){} + /** * @return true if Master carries regions * @deprecated since 2.4.0, will be removed in 3.0.0. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java index 712567cb3d1..1f5a801b610 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java @@ -46,7 +46,7 @@ public class BalancerChore extends ScheduledChore { @Override protected void chore() { try { - master.balance(); + master.balanceOrUpdateMetrics(); } catch (IOException e) { LOG.error("Failed to balance.", e); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 19742762d50..5d89b2c831c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -710,7 +710,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } } - private Map> toEnsumbleTableLoad( + protected final Map> toEnsumbleTableLoad( Map>> LoadOfAllTable) { Map> returnMap = new TreeMap<>(); for (Map> serverNameListMap : LoadOfAllTable.values()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 4d04282b1ba..8c01dea91ae 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -30,6 +30,7 @@ import java.util.concurrent.ThreadLocalRandom; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.RegionMetrics; import org.apache.hadoop.hbase.ServerMetrics; import org.apache.hadoop.hbase.ServerName; @@ -118,6 +119,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { "hbase.master.balancer.stochastic.minCostNeedBalance"; protected static final String COST_FUNCTIONS_COST_FUNCTIONS_KEY = "hbase.master.balancer.stochastic.additionalCostFunctions"; + public static final String OVERALL_COST_FUNCTION_NAME = "Overall"; Map> loads = new HashMap<>(); @@ -161,6 +163,12 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { super(new MetricsStochasticBalancer()); } + @RestrictedApi(explanation = "Should only be called in tests", link = "", + allowedOnPath = ".*/src/test/.*") + public StochasticLoadBalancer(MetricsStochasticBalancer metricsStochasticBalancer) { + super(metricsStochasticBalancer); + } + private static CostFunction createCostFunction(Class clazz, Configuration conf) { try { @@ -282,6 +290,35 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { } } + private void updateBalancerTableLoadInfo(TableName tableName, + Map> loadOfOneTable) { + RegionLocationFinder finder = null; + if ((this.localityCost != null && this.localityCost.getMultiplier() > 0) + || (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0)) { + finder = this.regionFinder; + } + BalancerClusterState cluster = + new BalancerClusterState(loadOfOneTable, loads, finder, rackManager); + + initCosts(cluster); + curOverallCost = computeCost(cluster, Double.MAX_VALUE); + System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); + updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); + } + + @Override + public void updateBalancerLoadInfo( + Map>> loadOfAllTable) { + if (isByTable) { + loadOfAllTable.forEach((tableName, loadOfOneTable) -> { + updateBalancerTableLoadInfo(tableName, loadOfOneTable); + }); + } else { + updateBalancerTableLoadInfo(HConstants.ENSEMBLE_TABLE_NAME, + toEnsumbleTableLoad(loadOfAllTable)); + } + } + /** * Update the number of metrics that are reported to JMX */ @@ -431,16 +468,17 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { initCosts(cluster); - if (!needsBalance(tableName, cluster)) { - return null; - } - double currentCost = computeCost(cluster, Double.MAX_VALUE); curOverallCost = currentCost; System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); + updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); double initCost = currentCost; double newCost; + if (!needsBalance(tableName, cluster)) { + return null; + } + long computedMaxSteps; if (runMaxSteps) { computedMaxSteps = Math.max(this.maxSteps, calculateMaxSteps(cluster)); @@ -499,9 +537,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { metricsBalancer.balanceCluster(endTime - startTime); - // update costs metrics - updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); if (initCost > currentCost) { + updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); plans = createRegionPlans(cluster); LOG.info("Finished computing new moving plan. Computation took {} ms" + " to try {} different iterations. Found a solution that moves " + @@ -570,7 +607,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { MetricsStochasticBalancer balancer = (MetricsStochasticBalancer) metricsBalancer; // overall cost balancer.updateStochasticCost(tableName.getNameAsString(), - "Overall", "Overall cost", overall); + OVERALL_COST_FUNCTION_NAME, "Overall cost", overall); // each cost function for (int i = 0; i < costFunctions.size(); i++) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java index 435a59132c2..603ccb3ed40 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java @@ -68,6 +68,9 @@ public class BalancerTestBase { protected static Configuration conf; protected static StochasticLoadBalancer loadBalancer; + protected static DummyMetricsStochasticBalancer dummyMetricsStochasticBalancer = + new DummyMetricsStochasticBalancer(); + @BeforeClass public static void beforeAllTests() throws Exception { conf = HBaseConfiguration.create(); @@ -75,7 +78,7 @@ public class BalancerTestBase { conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); conf.setFloat("hbase.regions.slop", 0.0f); conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0); - loadBalancer = new StochasticLoadBalancer(); + loadBalancer = new StochasticLoadBalancer(dummyMetricsStochasticBalancer); MasterServices services = mock(MasterServices.class); when(services.getConfiguration()).thenReturn(conf); loadBalancer.setMasterServices(services); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyMetricsStochasticBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyMetricsStochasticBalancer.java new file mode 100644 index 00000000000..fcb8f64b0ec --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyMetricsStochasticBalancer.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.HashMap; +import java.util.Map; + +public class DummyMetricsStochasticBalancer extends MetricsStochasticBalancer { + //We use a map to record those metrics that were updated to MetricsStochasticBalancer when running + // unit tests. + private Map costsMap; + + public DummyMetricsStochasticBalancer() { + //noop + } + + @Override + protected void initSource() { + costsMap = new HashMap<>(); + } + + @Override + public void balanceCluster(long time) { + //noop + } + + @Override + public void incrMiscInvocations() { + //noop + } + + @Override + public void balancerStatus(boolean status) { + //noop + } + + @Override + public void updateMetricsSize(int size) { + //noop + } + + @Override + public void updateStochasticCost(String tableName, String costFunctionName, + String costFunctionDesc, Double value) { + String key = tableName + "#" + costFunctionName; + costsMap.put(key, value); + } + + public Map getDummyCostsMap(){ + return this.costsMap; + } + + /** + * Clear all metrics in the cache map then prepare to run the next test + * */ + public void clearDummyMetrics(){ + this.costsMap.clear(); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index b0beac78a7d..ae57c805a2f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -159,6 +159,102 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { } } + @Test + public void testUpdateBalancerLoadInfo(){ + int[] cluster = new int[] { 10, 0 }; + Map> servers = mockClusterServers(cluster); + BalancerClusterState clusterState = mockCluster(cluster); + Map>> LoadOfAllTable = + (Map) mockClusterServersWithTables(servers); + try { + boolean[] perTableBalancerConfigs = { true, false }; + for (boolean isByTable : perTableBalancerConfigs) { + conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); + loadBalancer.onConfigurationChange(conf); + dummyMetricsStochasticBalancer.clearDummyMetrics(); + loadBalancer.updateBalancerLoadInfo(LoadOfAllTable); + assertTrue("Metrics should be recorded!", + dummyMetricsStochasticBalancer.getDummyCostsMap() != null + && !dummyMetricsStochasticBalancer.getDummyCostsMap().isEmpty()); + + String metricRecordKey; + if (isByTable) { + metricRecordKey = "table1#" + StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME; + } else { + metricRecordKey = HConstants.ENSEMBLE_TABLE_NAME + "#" + + StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME; + } + double curOverallCost = loadBalancer.computeCost(clusterState, Double.MAX_VALUE); + double curOverallCostInMetrics = + dummyMetricsStochasticBalancer.getDummyCostsMap().get(metricRecordKey); + assertEquals(curOverallCost, curOverallCostInMetrics, 0.001); + } + }finally { + conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE); + loadBalancer.onConfigurationChange(conf); + } + } + + @Test + public void testUpdateStochasticCosts() { + float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f); + try { + int[] cluster = new int[] { 10, 0 }; + Map> servers = mockClusterServers(cluster); + BalancerClusterState clusterState = mockCluster(cluster); + conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f); + conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false); + loadBalancer.onConfigurationChange(conf); + dummyMetricsStochasticBalancer.clearDummyMetrics(); + List plans = + loadBalancer.balanceCluster((Map) mockClusterServersWithTables(servers)); + + assertTrue("Balance plan should not be empty!", plans != null && !plans.isEmpty()); + assertTrue("There should be metrics record in MetricsStochasticBalancer", + !dummyMetricsStochasticBalancer.getDummyCostsMap().isEmpty()); + + double overallCostOfCluster = loadBalancer.computeCost(clusterState, Double.MAX_VALUE); + double overallCostInMetrics = dummyMetricsStochasticBalancer.getDummyCostsMap().get( + HConstants.ENSEMBLE_TABLE_NAME + "#" + StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME); + assertEquals(overallCostOfCluster, overallCostInMetrics, 0.001); + } finally { + //reset config + conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost); + conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE); + loadBalancer.onConfigurationChange(conf); + } + } + + @Test + public void testUpdateStochasticCostsIfBalanceNotRan() { + float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f); + try { + int[] cluster = new int[] { 10, 10 }; + Map> servers = mockClusterServers(cluster); + BalancerClusterState clusterState = mockCluster(cluster); + conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", Float.MAX_VALUE); + conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false); + loadBalancer.onConfigurationChange(conf); + dummyMetricsStochasticBalancer.clearDummyMetrics(); + List plans = + loadBalancer.balanceCluster((Map) mockClusterServersWithTables(servers)); + + assertTrue("Balance plan should be empty!", plans == null || plans.isEmpty()); + assertTrue("There should be metrics record in MetricsStochasticBalancer!", + !dummyMetricsStochasticBalancer.getDummyCostsMap().isEmpty()); + + double overallCostOfCluster = loadBalancer.computeCost(clusterState, Double.MAX_VALUE); + double overallCostInMetrics = dummyMetricsStochasticBalancer.getDummyCostsMap().get( + HConstants.ENSEMBLE_TABLE_NAME + "#" + StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME); + assertEquals(overallCostOfCluster, overallCostInMetrics, 0.001); + } finally { + //reset config + conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost); + conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE); + loadBalancer.onConfigurationChange(conf); + } + } + @Test public void testNeedBalance() { float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);