HBASE-25973 Balancer should explain progress in a better way in log (#3356)

Signed-off-by: stack <stack@apache.org>
Signed-off-by: Sean Busbey <busbey@apache.org>
This commit is contained in:
clarax 2021-07-12 10:10:24 -07:00 committed by GitHub
parent 875b63cf0b
commit 1e763d521f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 54 additions and 45 deletions

View File

@ -129,7 +129,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
private List<CandidateGenerator> candidateGenerators;
private List<CostFunction> costFunctions; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC
// To save currently configed sum of multiplier. Defaulted at 1 for cases that carry high cost
private float sumMultiplier = 1.0f;
// to save and report costs to JMX
private double curOverallCost = 0d;
private double[] tempFunctionCosts;
@ -220,7 +221,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
costFunctions = new ArrayList<>();
addCostFunction(new RegionCountSkewCostFunction(conf));
addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
@ -303,54 +303,55 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
boolean needsBalance(TableName tableName, BalancerClusterState cluster) {
ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not running balancer because only " + cs.getNumServers()
LOG.info("Not running balancer because only " + cs.getNumServers()
+ " active regionserver(s)");
}
sendRejectionReasonToRingBuffer(() -> "The number of RegionServers " + cs.getNumServers() +
" < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
return false;
}
if (areSomeRegionReplicasColocated(cluster)) {
LOG.info("Running balancer because at least one server hosts replicas of the same region.");
return true;
}
if (idleRegionServerExist(cluster)){
LOG.info("Running balancer because cluster has idle server(s).");
return true;
}
sumMultiplier = 0.0f;
double total = 0.0;
float sumMultiplier = 0.0f;
for (CostFunction c : costFunctions) {
float multiplier = c.getMultiplier();
if (multiplier <= 0) {
LOG.trace("{} not needed because multiplier is <= 0", c.getClass().getSimpleName());
continue;
}
double cost = c.cost();
if (!c.isNeeded()) {
LOG.trace("{} not needed", c.getClass().getSimpleName());
continue;
}
total += cost * multiplier;
sumMultiplier += multiplier;
total += c.cost() * multiplier;
}
if (sumMultiplier <= 0) {
LOG.error("At least one cost function needs a multiplier > 0. For example, set "
+ "hbase.master.balancer.stochastic.regionCountCost to a positive value or default");
return false;
}
boolean balanced = total <= 0 || sumMultiplier <= 0 ||
(sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
boolean balanced = (total / sumMultiplier < minCostNeedBalance);
if (balanced) {
final double calculatedTotal = total;
final double calculatedMultiplier = sumMultiplier;
sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, calculatedMultiplier),
costFunctions);
}
if (LOG.isDebugEnabled()) {
LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}",
balanced ? "Skipping load balancing because balanced" : "We need to load balance",
isByTable ? String.format("table (%s)", tableName) : "cluster",
total, sumMultiplier, minCostNeedBalance);
if (LOG.isTraceEnabled()) {
LOG.trace("Balance decision detailed function costs={}", functionCost());
}
sendRejectionReasonToRingBuffer(() ->
getBalanceReason(calculatedTotal, sumMultiplier), costFunctions);
LOG.info("{} - skipping load balancing because weighted average imbalance={} <= "
+ "threshold({}). If you want more aggressive balancing, either lower "
+ "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative "
+ "multiplier(s) of the specific cost function(s). functionCost={}",
isByTable ? "Table specific ("+tableName+")" : "Cluster wide", total / sumMultiplier,
minCostNeedBalance, minCostNeedBalance, functionCost());
} else {
LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
isByTable ? "Table specific ("+tableName+")" : "Cluster wide", maxRunningTime);
}
return !balanced;
}
@ -384,8 +385,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
// Allow turning this feature off if the locality cost is not going to
// be used in any computations.
RegionHDFSBlockLocationFinder finder = null;
if ((this.localityCost != null && this.localityCost.getMultiplier() > 0)
|| (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0)) {
if ((this.localityCost != null)
|| (this.rackLocalityCost != null)) {
finder = this.regionFinder;
}
@ -423,8 +424,9 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
maxSteps);
}
}
LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
+ functionCost() + " computedMaxSteps: " + computedMaxSteps);
LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, "
+ "functionCost={} computedMaxSteps={}",
currentCost / sumMultiplier, functionCost(), computedMaxSteps);
final String initFunctionTotalCosts = totalCostsPerFunc();
// Perform a stochastic walk to see if we can get a good fit.
@ -470,17 +472,18 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
if (initCost > currentCost) {
List<RegionPlan> plans = createRegionPlans(cluster);
LOG.info("Finished computing new load balance plan. Computation took {}" +
" to try {} different iterations. Found a solution that moves " +
"{} regions; Going from a computed cost of {}" +
" to a new cost of {}", java.time.Duration.ofMillis(endTime - startTime),
step, plans.size(), initCost, currentCost);
LOG.info("Finished computing new moving plan. Computation took {} ms" +
" to try {} different iterations. Found a solution that moves " +
"{} regions; Going from a computed imbalance of {}" +
" to a new imbalance of {}. ",
endTime - startTime, step, plans.size(),
initCost / sumMultiplier, currentCost / sumMultiplier);
sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
return plans;
}
LOG.info("Could not find a better load balance plan. Tried {} different configurations in " +
"{}, and did not find anything with a computed cost less than {}", step,
java.time.Duration.ofMillis(endTime - startTime), initCost);
LOG.info("Could not find a better moving plan. Tried {} different configurations in " +
"{} ms, and did not find anything with an imbalance score less than {}", step,
endTime - startTime, initCost / sumMultiplier);
return null;
}
@ -490,8 +493,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
BalancerRejection.Builder builder = new BalancerRejection.Builder().setReason(reason.get());
if (costFunctions != null) {
for (CostFunction c : costFunctions) {
float multiplier = c.getMultiplier();
if (multiplier <= 0 || !c.isNeeded()) {
if (!c.isNeeded()) {
continue;
}
builder.addCostFuncInfo(c.getClass().getName(), c.cost(), c.getMultiplier());
@ -545,7 +547,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
}
private void addCostFunction(CostFunction costFunction) {
if (costFunction.getMultiplier() > 0) {
float multiplier = costFunction.getMultiplier();
if (multiplier > 0) {
costFunctions.add(costFunction);
}
}
@ -556,9 +559,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
builder.append(c.getClass().getSimpleName());
builder.append(" : (");
if (c.isNeeded()) {
builder.append(c.getMultiplier());
builder.append("multiplier=" + c.getMultiplier());
builder.append(", ");
builder.append(c.cost());
double cost = c.cost();
builder.append("imbalance=" + cost);
if (cost < minCostNeedBalance) {
builder.append(", balanced");
}
} else {
builder.append("not needed");
}
@ -570,7 +577,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
private String totalCostsPerFunc() {
StringBuilder builder = new StringBuilder();
for (CostFunction c : costFunctions) {
if (c.getMultiplier() <= 0 || !c.isNeeded()) {
if (!c.isNeeded()) {
continue;
}
double cost = c.getMultiplier() * c.cost();
@ -654,7 +661,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
void updateCostsWithAction(BalancerClusterState cluster, BalanceAction action) {
for (CostFunction c : costFunctions) {
if (c.getMultiplier() > 0 && c.isNeeded()) {
if (c.isNeeded()) {
c.postAction(action);
}
}
@ -693,7 +700,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
CostFunction c = costFunctions.get(i);
this.tempFunctionCosts[i] = 0.0;
if (c.getMultiplier() <= 0 || !c.isNeeded()) {
if (!c.isNeeded()) {
continue;
}

View File

@ -1874,6 +1874,8 @@ public class HMaster extends HRegionServer implements MasterServices {
}
}
}
LOG.info("Balancer is going into sleep until next period in {}ms", getConfiguration()
.getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
return successRegionPlans;
}