HBASE-25973 Balancer should explain progress in a better way in log (#3356)
Signed-off-by: stack <stack@apache.org> Signed-off-by: Sean Busbey <busbey@apache.org>
This commit is contained in:
parent
875b63cf0b
commit
1e763d521f
|
@ -129,7 +129,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
|
||||
private List<CandidateGenerator> candidateGenerators;
|
||||
private List<CostFunction> costFunctions; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC
|
||||
|
||||
// To save currently configed sum of multiplier. Defaulted at 1 for cases that carry high cost
|
||||
private float sumMultiplier = 1.0f;
|
||||
// to save and report costs to JMX
|
||||
private double curOverallCost = 0d;
|
||||
private double[] tempFunctionCosts;
|
||||
|
@ -220,7 +221,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
|
||||
regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
|
||||
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
|
||||
|
||||
costFunctions = new ArrayList<>();
|
||||
addCostFunction(new RegionCountSkewCostFunction(conf));
|
||||
addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
|
||||
|
@ -303,54 +303,55 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
boolean needsBalance(TableName tableName, BalancerClusterState cluster) {
|
||||
ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
|
||||
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Not running balancer because only " + cs.getNumServers()
|
||||
LOG.info("Not running balancer because only " + cs.getNumServers()
|
||||
+ " active regionserver(s)");
|
||||
}
|
||||
sendRejectionReasonToRingBuffer(() -> "The number of RegionServers " + cs.getNumServers() +
|
||||
" < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
|
||||
return false;
|
||||
}
|
||||
if (areSomeRegionReplicasColocated(cluster)) {
|
||||
LOG.info("Running balancer because at least one server hosts replicas of the same region.");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (idleRegionServerExist(cluster)){
|
||||
LOG.info("Running balancer because cluster has idle server(s).");
|
||||
return true;
|
||||
}
|
||||
|
||||
sumMultiplier = 0.0f;
|
||||
double total = 0.0;
|
||||
float sumMultiplier = 0.0f;
|
||||
for (CostFunction c : costFunctions) {
|
||||
float multiplier = c.getMultiplier();
|
||||
if (multiplier <= 0) {
|
||||
LOG.trace("{} not needed because multiplier is <= 0", c.getClass().getSimpleName());
|
||||
continue;
|
||||
}
|
||||
double cost = c.cost();
|
||||
if (!c.isNeeded()) {
|
||||
LOG.trace("{} not needed", c.getClass().getSimpleName());
|
||||
continue;
|
||||
}
|
||||
total += cost * multiplier;
|
||||
sumMultiplier += multiplier;
|
||||
total += c.cost() * multiplier;
|
||||
}
|
||||
if (sumMultiplier <= 0) {
|
||||
LOG.error("At least one cost function needs a multiplier > 0. For example, set "
|
||||
+ "hbase.master.balancer.stochastic.regionCountCost to a positive value or default");
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean balanced = total <= 0 || sumMultiplier <= 0 ||
|
||||
(sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
|
||||
boolean balanced = (total / sumMultiplier < minCostNeedBalance);
|
||||
|
||||
if (balanced) {
|
||||
final double calculatedTotal = total;
|
||||
final double calculatedMultiplier = sumMultiplier;
|
||||
sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, calculatedMultiplier),
|
||||
costFunctions);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}",
|
||||
balanced ? "Skipping load balancing because balanced" : "We need to load balance",
|
||||
isByTable ? String.format("table (%s)", tableName) : "cluster",
|
||||
total, sumMultiplier, minCostNeedBalance);
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Balance decision detailed function costs={}", functionCost());
|
||||
}
|
||||
sendRejectionReasonToRingBuffer(() ->
|
||||
getBalanceReason(calculatedTotal, sumMultiplier), costFunctions);
|
||||
LOG.info("{} - skipping load balancing because weighted average imbalance={} <= "
|
||||
+ "threshold({}). If you want more aggressive balancing, either lower "
|
||||
+ "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative "
|
||||
+ "multiplier(s) of the specific cost function(s). functionCost={}",
|
||||
isByTable ? "Table specific ("+tableName+")" : "Cluster wide", total / sumMultiplier,
|
||||
minCostNeedBalance, minCostNeedBalance, functionCost());
|
||||
} else {
|
||||
LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
|
||||
isByTable ? "Table specific ("+tableName+")" : "Cluster wide", maxRunningTime);
|
||||
}
|
||||
return !balanced;
|
||||
}
|
||||
|
@ -384,8 +385,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
// Allow turning this feature off if the locality cost is not going to
|
||||
// be used in any computations.
|
||||
RegionHDFSBlockLocationFinder finder = null;
|
||||
if ((this.localityCost != null && this.localityCost.getMultiplier() > 0)
|
||||
|| (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0)) {
|
||||
if ((this.localityCost != null)
|
||||
|| (this.rackLocalityCost != null)) {
|
||||
finder = this.regionFinder;
|
||||
}
|
||||
|
||||
|
@ -423,8 +424,9 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
maxSteps);
|
||||
}
|
||||
}
|
||||
LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
|
||||
+ functionCost() + " computedMaxSteps: " + computedMaxSteps);
|
||||
LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, "
|
||||
+ "functionCost={} computedMaxSteps={}",
|
||||
currentCost / sumMultiplier, functionCost(), computedMaxSteps);
|
||||
|
||||
final String initFunctionTotalCosts = totalCostsPerFunc();
|
||||
// Perform a stochastic walk to see if we can get a good fit.
|
||||
|
@ -470,17 +472,18 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
|
||||
if (initCost > currentCost) {
|
||||
List<RegionPlan> plans = createRegionPlans(cluster);
|
||||
LOG.info("Finished computing new load balance plan. Computation took {}" +
|
||||
" to try {} different iterations. Found a solution that moves " +
|
||||
"{} regions; Going from a computed cost of {}" +
|
||||
" to a new cost of {}", java.time.Duration.ofMillis(endTime - startTime),
|
||||
step, plans.size(), initCost, currentCost);
|
||||
LOG.info("Finished computing new moving plan. Computation took {} ms" +
|
||||
" to try {} different iterations. Found a solution that moves " +
|
||||
"{} regions; Going from a computed imbalance of {}" +
|
||||
" to a new imbalance of {}. ",
|
||||
endTime - startTime, step, plans.size(),
|
||||
initCost / sumMultiplier, currentCost / sumMultiplier);
|
||||
sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
|
||||
return plans;
|
||||
}
|
||||
LOG.info("Could not find a better load balance plan. Tried {} different configurations in " +
|
||||
"{}, and did not find anything with a computed cost less than {}", step,
|
||||
java.time.Duration.ofMillis(endTime - startTime), initCost);
|
||||
LOG.info("Could not find a better moving plan. Tried {} different configurations in " +
|
||||
"{} ms, and did not find anything with an imbalance score less than {}", step,
|
||||
endTime - startTime, initCost / sumMultiplier);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -490,8 +493,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
BalancerRejection.Builder builder = new BalancerRejection.Builder().setReason(reason.get());
|
||||
if (costFunctions != null) {
|
||||
for (CostFunction c : costFunctions) {
|
||||
float multiplier = c.getMultiplier();
|
||||
if (multiplier <= 0 || !c.isNeeded()) {
|
||||
if (!c.isNeeded()) {
|
||||
continue;
|
||||
}
|
||||
builder.addCostFuncInfo(c.getClass().getName(), c.cost(), c.getMultiplier());
|
||||
|
@ -545,7 +547,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
}
|
||||
|
||||
private void addCostFunction(CostFunction costFunction) {
|
||||
if (costFunction.getMultiplier() > 0) {
|
||||
float multiplier = costFunction.getMultiplier();
|
||||
if (multiplier > 0) {
|
||||
costFunctions.add(costFunction);
|
||||
}
|
||||
}
|
||||
|
@ -556,9 +559,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
builder.append(c.getClass().getSimpleName());
|
||||
builder.append(" : (");
|
||||
if (c.isNeeded()) {
|
||||
builder.append(c.getMultiplier());
|
||||
builder.append("multiplier=" + c.getMultiplier());
|
||||
builder.append(", ");
|
||||
builder.append(c.cost());
|
||||
double cost = c.cost();
|
||||
builder.append("imbalance=" + cost);
|
||||
if (cost < minCostNeedBalance) {
|
||||
builder.append(", balanced");
|
||||
}
|
||||
} else {
|
||||
builder.append("not needed");
|
||||
}
|
||||
|
@ -570,7 +577,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
private String totalCostsPerFunc() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (CostFunction c : costFunctions) {
|
||||
if (c.getMultiplier() <= 0 || !c.isNeeded()) {
|
||||
if (!c.isNeeded()) {
|
||||
continue;
|
||||
}
|
||||
double cost = c.getMultiplier() * c.cost();
|
||||
|
@ -654,7 +661,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
|
||||
void updateCostsWithAction(BalancerClusterState cluster, BalanceAction action) {
|
||||
for (CostFunction c : costFunctions) {
|
||||
if (c.getMultiplier() > 0 && c.isNeeded()) {
|
||||
if (c.isNeeded()) {
|
||||
c.postAction(action);
|
||||
}
|
||||
}
|
||||
|
@ -693,7 +700,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
CostFunction c = costFunctions.get(i);
|
||||
this.tempFunctionCosts[i] = 0.0;
|
||||
|
||||
if (c.getMultiplier() <= 0 || !c.isNeeded()) {
|
||||
if (!c.isNeeded()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -1874,6 +1874,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
}
|
||||
}
|
||||
}
|
||||
LOG.info("Balancer is going into sleep until next period in {}ms", getConfiguration()
|
||||
.getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
|
||||
return successRegionPlans;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue