HubSpot Backport: HBASE-26251 StochasticLoadBalancer metrics should update even if balancer doesn't run (#3706)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
Reviewed-by: Bryan Beaudreault <bbeaudreault@hubspot.com>
This commit is contained in:
GeorryHuang 2021-09-29 21:25:39 +08:00 committed by Bryan Beaudreault
parent 36adc2e6ac
commit b988fd3bb2
9 changed files with 260 additions and 10 deletions

View File

@ -106,6 +106,12 @@ public class RSGroupBasedLoadBalancer implements RSGroupableBalancer {
} }
} }
@Override
public synchronized void updateBalancerLoadInfo(Map<TableName, Map<ServerName, List<RegionInfo>>>
loadOfAllTable){
internalBalancer.updateBalancerLoadInfo(loadOfAllTable);
}
@Override @Override
public void setClusterMetrics(ClusterMetrics sm) { public void setClusterMetrics(ClusterMetrics sm) {
this.clusterStatus = sm; this.clusterStatus = sm;

View File

@ -1710,6 +1710,30 @@ public class HMaster extends HRegionServer implements MasterServices {
return balance(BalanceRequest.defaultInstance()); return balance(BalanceRequest.defaultInstance());
} }
/**
* Trigger a normal balance, see {@link HMaster#balance()} . If the balance is not executed
* this time, the metrics related to the balance will be updated.
*
* When balance is running, related metrics will be updated at the same time. But if some
* checking logic failed and cause the balancer exit early, we lost the chance to update
* balancer metrics. This will lead to user missing the latest balancer info.
* */
public BalanceResponse balanceOrUpdateMetrics() throws IOException{
synchronized (this.balancer) {
BalanceResponse response = balance();
if (!response.isBalancerRan()) {
Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
this.assignmentManager.getRegionStates().getAssignmentsForBalancer(this.tableStateManager,
this.serverManager.getOnlineServersList());
for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
}
this.balancer.updateBalancerLoadInfo(assignments);
}
return response;
}
}
/** /**
* Checks master state before initiating action over region topology. * Checks master state before initiating action over region topology.
* @param action the name of the action under consideration, for logging. * @param action the name of the action under consideration, for logging.

View File

@ -169,6 +169,15 @@ public interface LoadBalancer extends Configurable, Stoppable, ConfigurationObse
/*Updates balancer status tag reported to JMX*/ /*Updates balancer status tag reported to JMX*/
void updateBalancerStatus(boolean status); void updateBalancerStatus(boolean status);
/**
* In some scenarios, Balancer needs to update internal status or information according to the
* current tables load
*
* @param loadOfAllTable region load of servers for all table
*/
default void updateBalancerLoadInfo(Map<TableName, Map<ServerName, List<RegionInfo>>>
loadOfAllTable){}
/** /**
* @return true if Master carries regions * @return true if Master carries regions
* @deprecated since 2.4.0, will be removed in 3.0.0. * @deprecated since 2.4.0, will be removed in 3.0.0.

View File

@ -46,7 +46,7 @@ public class BalancerChore extends ScheduledChore {
@Override @Override
protected void chore() { protected void chore() {
try { try {
master.balance(); master.balanceOrUpdateMetrics();
} catch (IOException e) { } catch (IOException e) {
LOG.error("Failed to balance.", e); LOG.error("Failed to balance.", e);
} }

View File

@ -1706,7 +1706,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
} }
} }
private Map<ServerName, List<RegionInfo>> toEnsumbleTableLoad( protected final Map<ServerName, List<RegionInfo>> toEnsumbleTableLoad(
Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable) { Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable) {
Map<ServerName, List<RegionInfo>> returnMap = new TreeMap<>(); Map<ServerName, List<RegionInfo>> returnMap = new TreeMap<>();
for (Map<ServerName, List<RegionInfo>> serverNameListMap : LoadOfAllTable.values()) { for (Map<ServerName, List<RegionInfo>> serverNameListMap : LoadOfAllTable.values()) {

View File

@ -34,6 +34,7 @@ import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.RegionMetrics; import org.apache.hadoop.hbase.RegionMetrics;
import org.apache.hadoop.hbase.ServerMetrics; import org.apache.hadoop.hbase.ServerMetrics;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
@ -130,6 +131,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
"hbase.master.balancer.stochastic.minCostNeedBalance"; "hbase.master.balancer.stochastic.minCostNeedBalance";
protected static final String COST_FUNCTIONS_COST_FUNCTIONS_KEY = protected static final String COST_FUNCTIONS_COST_FUNCTIONS_KEY =
"hbase.master.balancer.stochastic.additionalCostFunctions"; "hbase.master.balancer.stochastic.additionalCostFunctions";
public static final String OVERALL_COST_FUNCTION_NAME = "Overall";
protected static final Random RANDOM = new Random(System.currentTimeMillis()); protected static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Logger LOG = LoggerFactory.getLogger(StochasticLoadBalancer.class); private static final Logger LOG = LoggerFactory.getLogger(StochasticLoadBalancer.class);
@ -177,6 +179,12 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
super(new MetricsStochasticBalancer()); super(new MetricsStochasticBalancer());
} }
@RestrictedApi(explanation = "Should only be called in tests", link = "",
allowedOnPath = ".*/src/test/.*")
public StochasticLoadBalancer(MetricsStochasticBalancer metricsStochasticBalancer) {
super(metricsStochasticBalancer);
}
@Override @Override
public void onConfigurationChange(Configuration conf) { public void onConfigurationChange(Configuration conf) {
setConf(conf); setConf(conf);
@ -301,6 +309,35 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
} }
} }
private void updateBalancerTableLoadInfo(TableName tableName,
Map<ServerName, List<RegionInfo>> loadOfOneTable) {
RegionLocationFinder finder = null;
if ((this.localityCost != null && this.localityCost.getMultiplier() > 0)
|| (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0)) {
finder = this.regionFinder;
}
Cluster cluster =
new Cluster(loadOfOneTable, loads, finder, rackManager);
initCosts(cluster);
curOverallCost = computeCost(cluster, Double.MAX_VALUE);
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
}
@Override
public void updateBalancerLoadInfo(
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) {
if (isByTable) {
loadOfAllTable.forEach((tableName, loadOfOneTable) -> {
updateBalancerTableLoadInfo(tableName, loadOfOneTable);
});
} else {
updateBalancerTableLoadInfo(HConstants.ENSEMBLE_TABLE_NAME,
toEnsumbleTableLoad(loadOfAllTable));
}
}
/** /**
* Update the number of metrics that are reported to JMX * Update the number of metrics that are reported to JMX
*/ */
@ -435,16 +472,17 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
initCosts(cluster); initCosts(cluster);
if (!needsBalance(tableName, cluster)) {
return null;
}
double currentCost = computeCost(cluster, Double.MAX_VALUE); double currentCost = computeCost(cluster, Double.MAX_VALUE);
curOverallCost = currentCost; curOverallCost = currentCost;
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
double initCost = currentCost; double initCost = currentCost;
double newCost; double newCost;
if (!needsBalance(tableName, cluster)) {
return null;
}
long computedMaxSteps; long computedMaxSteps;
if (runMaxSteps) { if (runMaxSteps) {
computedMaxSteps = Math.max(this.maxSteps, computedMaxSteps = Math.max(this.maxSteps,
@ -504,9 +542,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
metricsBalancer.balanceCluster(endTime - startTime); metricsBalancer.balanceCluster(endTime - startTime);
// update costs metrics
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
if (initCost > currentCost) { if (initCost > currentCost) {
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
plans = createRegionPlans(cluster); plans = createRegionPlans(cluster);
LOG.info("Finished computing new moving plan. Computation took {} ms" + LOG.info("Finished computing new moving plan. Computation took {} ms" +
" to try {} different iterations. Found a solution that moves " + " to try {} different iterations. Found a solution that moves " +
@ -574,7 +611,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
MetricsStochasticBalancer balancer = (MetricsStochasticBalancer) metricsBalancer; MetricsStochasticBalancer balancer = (MetricsStochasticBalancer) metricsBalancer;
// overall cost // overall cost
balancer.updateStochasticCost(tableName.getNameAsString(), balancer.updateStochasticCost(tableName.getNameAsString(),
"Overall", "Overall cost", overall); OVERALL_COST_FUNCTION_NAME, "Overall cost", overall);
// each cost function // each cost function
for (int i = 0; i < costFunctions.size(); i++) { for (int i = 0; i < costFunctions.size(); i++) {

View File

@ -65,6 +65,9 @@ public class BalancerTestBase {
protected static Configuration conf; protected static Configuration conf;
protected static StochasticLoadBalancer loadBalancer; protected static StochasticLoadBalancer loadBalancer;
protected static DummyMetricsStochasticBalancer dummyMetricsStochasticBalancer =
new DummyMetricsStochasticBalancer();
@BeforeClass @BeforeClass
public static void beforeAllTests() throws Exception { public static void beforeAllTests() throws Exception {
conf = HBaseConfiguration.create(); conf = HBaseConfiguration.create();
@ -72,7 +75,7 @@ public class BalancerTestBase {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setFloat("hbase.regions.slop", 0.0f); conf.setFloat("hbase.regions.slop", 0.0f);
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0); conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
loadBalancer = new StochasticLoadBalancer(); loadBalancer = new StochasticLoadBalancer(dummyMetricsStochasticBalancer);
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
} }

View File

@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
import java.util.HashMap;
import java.util.Map;
public class DummyMetricsStochasticBalancer extends MetricsStochasticBalancer {
//We use a map to record those metrics that were updated to MetricsStochasticBalancer when running
// unit tests.
private Map<String, Double> costsMap;
public DummyMetricsStochasticBalancer() {
//noop
}
@Override
protected void initSource() {
costsMap = new HashMap<>();
}
@Override
public void balanceCluster(long time) {
//noop
}
@Override
public void incrMiscInvocations() {
//noop
}
@Override
public void balancerStatus(boolean status) {
//noop
}
@Override
public void updateMetricsSize(int size) {
//noop
}
@Override
public void updateStochasticCost(String tableName, String costFunctionName,
String costFunctionDesc, Double value) {
String key = tableName + "#" + costFunctionName;
costsMap.put(key, value);
}
public Map<String,Double> getDummyCostsMap(){
return this.costsMap;
}
/**
* Clear all metrics in the cache map then prepare to run the next test
* */
public void clearDummyMetrics(){
this.costsMap.clear();
}
}

View File

@ -164,6 +164,102 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
} }
} }
@Test
public void testUpdateBalancerLoadInfo(){
int[] cluster = new int[] { 10, 0 };
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(cluster);
Cluster clusterState = mockCluster(cluster);
Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable =
(Map) mockClusterServersWithTables(servers);
try {
boolean[] perTableBalancerConfigs = { true, false };
for (boolean isByTable : perTableBalancerConfigs) {
conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
loadBalancer.onConfigurationChange(conf);
dummyMetricsStochasticBalancer.clearDummyMetrics();
loadBalancer.updateBalancerLoadInfo(LoadOfAllTable);
assertTrue("Metrics should be recorded!",
dummyMetricsStochasticBalancer.getDummyCostsMap() != null
&& !dummyMetricsStochasticBalancer.getDummyCostsMap().isEmpty());
String metricRecordKey;
if (isByTable) {
metricRecordKey = "table1#" + StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME;
} else {
metricRecordKey = HConstants.ENSEMBLE_TABLE_NAME + "#"
+ StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME;
}
double curOverallCost = loadBalancer.computeCost(clusterState, Double.MAX_VALUE);
double curOverallCostInMetrics =
dummyMetricsStochasticBalancer.getDummyCostsMap().get(metricRecordKey);
assertEquals(curOverallCost, curOverallCostInMetrics, 0.001);
}
}finally {
conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE);
loadBalancer.onConfigurationChange(conf);
}
}
@Test
public void testUpdateStochasticCosts() {
float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);
try {
int[] cluster = new int[] { 10, 0 };
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(cluster);
Cluster clusterState = mockCluster(cluster);
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f);
conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false);
loadBalancer.onConfigurationChange(conf);
dummyMetricsStochasticBalancer.clearDummyMetrics();
List<RegionPlan> plans =
loadBalancer.balanceCluster((Map) mockClusterServersWithTables(servers));
assertTrue("Balance plan should not be empty!", plans != null && !plans.isEmpty());
assertTrue("There should be metrics record in MetricsStochasticBalancer",
!dummyMetricsStochasticBalancer.getDummyCostsMap().isEmpty());
double overallCostOfCluster = loadBalancer.computeCost(clusterState, Double.MAX_VALUE);
double overallCostInMetrics = dummyMetricsStochasticBalancer.getDummyCostsMap().get(
HConstants.ENSEMBLE_TABLE_NAME + "#" + StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME);
assertEquals(overallCostOfCluster, overallCostInMetrics, 0.001);
} finally {
//reset config
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost);
conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE);
loadBalancer.onConfigurationChange(conf);
}
}
@Test
public void testUpdateStochasticCostsIfBalanceNotRan() {
float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);
try {
int[] cluster = new int[] { 10, 10 };
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(cluster);
Cluster clusterState = mockCluster(cluster);
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", Float.MAX_VALUE);
conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false);
loadBalancer.onConfigurationChange(conf);
dummyMetricsStochasticBalancer.clearDummyMetrics();
List<RegionPlan> plans =
loadBalancer.balanceCluster((Map) mockClusterServersWithTables(servers));
assertTrue("Balance plan should be empty!", plans == null || plans.isEmpty());
assertTrue("There should be metrics record in MetricsStochasticBalancer!",
!dummyMetricsStochasticBalancer.getDummyCostsMap().isEmpty());
double overallCostOfCluster = loadBalancer.computeCost(clusterState, Double.MAX_VALUE);
double overallCostInMetrics = dummyMetricsStochasticBalancer.getDummyCostsMap().get(
HConstants.ENSEMBLE_TABLE_NAME + "#" + StochasticLoadBalancer.OVERALL_COST_FUNCTION_NAME);
assertEquals(overallCostOfCluster, overallCostInMetrics, 0.001);
} finally {
//reset config
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost);
conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE);
loadBalancer.onConfigurationChange(conf);
}
}
@Test @Test
public void testNeedBalance() { public void testNeedBalance() {
float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f); float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);