HBASE-25973 Balancer should explain progress in a better way in log - backport branch-2 (#3485)
Signed-off-by: stack <stack@apache.org>
This commit is contained in:
parent
dd2ae3605d
commit
6ab6d6f231
@ -1858,6 +1858,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
LOG.info("Balancer is going into sleep until next period in {}ms", getConfiguration()
|
||||||
|
.getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
|
||||||
return successRegionPlans;
|
return successRegionPlans;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,6 +74,8 @@ import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
|
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="IS2_INCONSISTENT_SYNC",
|
||||||
|
justification="Complaint is about isByTable not being synchronized; we don't modify often")
|
||||||
public abstract class BaseLoadBalancer implements LoadBalancer {
|
public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
|
|
||||||
public static final String BALANCER_DECISION_BUFFER_ENABLED =
|
public static final String BALANCER_DECISION_BUFFER_ENABLED =
|
||||||
|
@ -149,7 +149,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
|
|
||||||
private List<CandidateGenerator> candidateGenerators;
|
private List<CandidateGenerator> candidateGenerators;
|
||||||
private List<CostFunction> costFunctions; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC
|
private List<CostFunction> costFunctions; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC
|
||||||
|
// To save currently configed sum of multiplier. Defaulted at 1 for cases that carry high cost
|
||||||
|
private float sumMultiplier = 1.0f;
|
||||||
// to save and report costs to JMX
|
// to save and report costs to JMX
|
||||||
private double curOverallCost = 0d;
|
private double curOverallCost = 0d;
|
||||||
private double[] tempFunctionCosts;
|
private double[] tempFunctionCosts;
|
||||||
@ -206,7 +207,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
}
|
}
|
||||||
regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
|
regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
|
||||||
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
|
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
|
||||||
|
|
||||||
costFunctions = new ArrayList<>();
|
costFunctions = new ArrayList<>();
|
||||||
addCostFunction(new RegionCountSkewCostFunction(conf));
|
addCostFunction(new RegionCountSkewCostFunction(conf));
|
||||||
addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
|
addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
|
||||||
@ -327,63 +327,65 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
protected boolean needsBalance(TableName tableName, Cluster cluster) {
|
protected boolean needsBalance(TableName tableName, Cluster cluster) {
|
||||||
ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
|
ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
|
||||||
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
|
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
|
||||||
if (LOG.isDebugEnabled()) {
|
LOG.info("Not running balancer because only " + cs.getNumServers() +
|
||||||
LOG.debug("Not running balancer because only " + cs.getNumServers()
|
" active regionserver(s)");
|
||||||
+ " active regionserver(s)");
|
sendRejectionReasonToRingBuffer("The number of RegionServers " + cs.getNumServers() +
|
||||||
}
|
" < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
|
||||||
if (this.isBalancerRejectionRecording) {
|
|
||||||
sendRejectionReasonToRingBuffer("The number of RegionServers " +
|
|
||||||
cs.getNumServers() + " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (areSomeRegionReplicasColocated(cluster)) {
|
if (areSomeRegionReplicasColocated(cluster)) {
|
||||||
|
LOG.info("Running balancer because at least one server hosts replicas of the same region.");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (idleRegionServerExist(cluster)){
|
if (idleRegionServerExist(cluster)){
|
||||||
|
LOG.info("Running balancer because cluster has idle server(s).");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sumMultiplier = 0.0f;
|
||||||
double total = 0.0;
|
double total = 0.0;
|
||||||
float sumMultiplier = 0.0f;
|
|
||||||
for (CostFunction c : costFunctions) {
|
for (CostFunction c : costFunctions) {
|
||||||
float multiplier = c.getMultiplier();
|
float multiplier = c.getMultiplier();
|
||||||
if (multiplier <= 0) {
|
double cost = c.cost();
|
||||||
LOG.trace("{} not needed because multiplier is <= 0", c.getClass().getSimpleName());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!c.isNeeded()) {
|
if (!c.isNeeded()) {
|
||||||
LOG.trace("{} not needed", c.getClass().getSimpleName());
|
LOG.trace("{} not needed", c.getClass().getSimpleName());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
total += cost * multiplier;
|
||||||
sumMultiplier += multiplier;
|
sumMultiplier += multiplier;
|
||||||
total += c.cost() * multiplier;
|
}
|
||||||
|
if (sumMultiplier <= 0) {
|
||||||
|
LOG.error("At least one cost function needs a multiplier > 0. For example, set "
|
||||||
|
+ "hbase.master.balancer.stochastic.regionCountCost to a positive value or default");
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean balanced = total <= 0 || sumMultiplier <= 0 ||
|
boolean balanced = (total / sumMultiplier < minCostNeedBalance);
|
||||||
(sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
|
if (balanced) {
|
||||||
if(balanced && isBalancerRejectionRecording){
|
if (isBalancerRejectionRecording) {
|
||||||
String reason = "";
|
String reason = "";
|
||||||
if (total <= 0) {
|
if (total <= 0) {
|
||||||
reason = "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total + " <= 0";
|
reason =
|
||||||
} else if (sumMultiplier <= 0) {
|
"(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total + " <= 0";
|
||||||
reason = "sumMultiplier = " + sumMultiplier + " <= 0";
|
} else if (sumMultiplier <= 0) {
|
||||||
} else if ((total / sumMultiplier) < minCostNeedBalance) {
|
reason = "sumMultiplier = " + sumMultiplier + " <= 0";
|
||||||
reason =
|
} else if ((total / sumMultiplier) < minCostNeedBalance) {
|
||||||
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " + (total
|
reason =
|
||||||
/ sumMultiplier) + " <= minCostNeedBalance(" + minCostNeedBalance + ")";
|
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " + (
|
||||||
}
|
total / sumMultiplier) + " <= minCostNeedBalance(" + minCostNeedBalance + ")";
|
||||||
sendRejectionReasonToRingBuffer(reason, costFunctions);
|
}
|
||||||
}
|
sendRejectionReasonToRingBuffer(reason, costFunctions);
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}",
|
|
||||||
balanced ? "Skipping load balancing because balanced" : "We need to load balance",
|
|
||||||
isByTable ? String.format("table (%s)", tableName) : "cluster",
|
|
||||||
total, sumMultiplier, minCostNeedBalance);
|
|
||||||
if (LOG.isTraceEnabled()) {
|
|
||||||
LOG.trace("Balance decision detailed function costs={}", functionCost());
|
|
||||||
}
|
}
|
||||||
|
LOG.info("{} - skipping load balancing because weighted average imbalance={} <= " +
|
||||||
|
"threshold({}). If you want more aggressive balancing, either lower "
|
||||||
|
+ "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative"
|
||||||
|
+ " multiplier(s) of the specific cost function(s). functionCost={}",
|
||||||
|
isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", total / sumMultiplier,
|
||||||
|
minCostNeedBalance, minCostNeedBalance, functionCost());
|
||||||
|
} else {
|
||||||
|
LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
|
||||||
|
isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime);
|
||||||
}
|
}
|
||||||
return !balanced;
|
return !balanced;
|
||||||
}
|
}
|
||||||
@ -419,8 +421,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
// Allow turning this feature off if the locality cost is not going to
|
// Allow turning this feature off if the locality cost is not going to
|
||||||
// be used in any computations.
|
// be used in any computations.
|
||||||
RegionLocationFinder finder = null;
|
RegionLocationFinder finder = null;
|
||||||
if ((this.localityCost != null && this.localityCost.getMultiplier() > 0)
|
if ((this.localityCost != null && this.localityCost.getMultiplier() > 0) || (
|
||||||
|| (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0)) {
|
this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0)) {
|
||||||
finder = this.regionFinder;
|
finder = this.regionFinder;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -446,21 +448,22 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
long computedMaxSteps;
|
long computedMaxSteps;
|
||||||
if (runMaxSteps) {
|
if (runMaxSteps) {
|
||||||
computedMaxSteps = Math.max(this.maxSteps,
|
computedMaxSteps = Math.max(this.maxSteps,
|
||||||
((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
|
((long) cluster.numRegions * (long) this.stepsPerRegion * (long) cluster.numServers));
|
||||||
} else {
|
} else {
|
||||||
long calculatedMaxSteps = (long)cluster.numRegions * (long)this.stepsPerRegion *
|
long calculatedMaxSteps =
|
||||||
(long)cluster.numServers;
|
(long) cluster.numRegions * (long) this.stepsPerRegion * (long) cluster.numServers;
|
||||||
computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps);
|
computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps);
|
||||||
if (calculatedMaxSteps > maxSteps) {
|
if (calculatedMaxSteps > maxSteps) {
|
||||||
LOG.warn("calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than "
|
LOG.warn("calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than " +
|
||||||
+ "maxSteps:{}. Hence load balancing may not work well. Setting parameter "
|
"maxSteps:{}. Hence load balancing may not work well. Setting parameter " +
|
||||||
+ "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can overcome this issue."
|
"\"hbase.master.balancer.stochastic.runMaxSteps\" to true can overcome this issue." +
|
||||||
+ "(This config change does not require service restart)", calculatedMaxSteps,
|
"(This config change does not require service restart)", calculatedMaxSteps,
|
||||||
maxSteps);
|
maxSteps);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
|
LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}," +
|
||||||
+ functionCost() + " computedMaxSteps: " + computedMaxSteps);
|
" functionCost={} computedMaxSteps={}",
|
||||||
|
currentCost / sumMultiplier, functionCost(), computedMaxSteps);
|
||||||
|
|
||||||
final String initFunctionTotalCosts = totalCostsPerFunc();
|
final String initFunctionTotalCosts = totalCostsPerFunc();
|
||||||
// Perform a stochastic walk to see if we can get a good fit.
|
// Perform a stochastic walk to see if we can get a good fit.
|
||||||
@ -493,8 +496,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
updateCostsWithAction(cluster, undoAction);
|
updateCostsWithAction(cluster, undoAction);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EnvironmentEdgeManager.currentTime() - startTime >
|
if (EnvironmentEdgeManager.currentTime() - startTime > maxRunningTime) {
|
||||||
maxRunningTime) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -506,17 +508,18 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
|
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
|
||||||
if (initCost > currentCost) {
|
if (initCost > currentCost) {
|
||||||
plans = createRegionPlans(cluster);
|
plans = createRegionPlans(cluster);
|
||||||
LOG.info("Finished computing new load balance plan. Computation took {}" +
|
LOG.info("Finished computing new moving plan. Computation took {} ms" +
|
||||||
" to try {} different iterations. Found a solution that moves " +
|
" to try {} different iterations. Found a solution that moves " +
|
||||||
"{} regions; Going from a computed cost of {}" +
|
"{} regions; Going from a computed imbalance of {}" + " to a new imbalance of {}. ",
|
||||||
" to a new cost of {}", java.time.Duration.ofMillis(endTime - startTime),
|
endTime - startTime, step, plans.size(), initCost / sumMultiplier,
|
||||||
step, plans.size(), initCost, currentCost);
|
currentCost / sumMultiplier);
|
||||||
|
|
||||||
sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
|
sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
|
||||||
return plans;
|
return plans;
|
||||||
}
|
}
|
||||||
LOG.info("Could not find a better load balance plan. Tried {} different configurations in " +
|
LOG.info("Could not find a better moving plan. Tried {} different configurations in "
|
||||||
"{}, and did not find anything with a computed cost less than {}", step,
|
+ "{} ms, and did not find anything with an imbalance score less than {}", step,
|
||||||
java.time.Duration.ofMillis(endTime - startTime), initCost);
|
endTime - startTime, initCost / sumMultiplier);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -527,8 +530,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
.setReason(reason);
|
.setReason(reason);
|
||||||
if (costFunctions != null) {
|
if (costFunctions != null) {
|
||||||
for (CostFunction c : costFunctions) {
|
for (CostFunction c : costFunctions) {
|
||||||
float multiplier = c.getMultiplier();
|
if (!c.isNeeded()) {
|
||||||
if (multiplier <= 0 || !c.isNeeded()) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
builder.addCostFuncInfo(c.getClass().getName(), c.cost(), c.getMultiplier());
|
builder.addCostFuncInfo(c.getClass().getName(), c.cost(), c.getMultiplier());
|
||||||
@ -587,7 +589,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void addCostFunction(CostFunction costFunction) {
|
private void addCostFunction(CostFunction costFunction) {
|
||||||
if (costFunction.getMultiplier() > 0) {
|
float multiplier = costFunction.getMultiplier();
|
||||||
|
if (multiplier > 0) {
|
||||||
costFunctions.add(costFunction);
|
costFunctions.add(costFunction);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -598,9 +601,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
builder.append(c.getClass().getSimpleName());
|
builder.append(c.getClass().getSimpleName());
|
||||||
builder.append(" : (");
|
builder.append(" : (");
|
||||||
if (c.isNeeded()) {
|
if (c.isNeeded()) {
|
||||||
builder.append(c.getMultiplier());
|
builder.append("multiplier=" + c.getMultiplier());
|
||||||
builder.append(", ");
|
builder.append(", ");
|
||||||
builder.append(c.cost());
|
double cost = c.cost();
|
||||||
|
builder.append("imbalance=" + cost);
|
||||||
|
if (cost < minCostNeedBalance) {
|
||||||
|
builder.append(", balanced");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
builder.append("not needed");
|
builder.append("not needed");
|
||||||
}
|
}
|
||||||
@ -612,7 +619,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
private String totalCostsPerFunc() {
|
private String totalCostsPerFunc() {
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
for (CostFunction c : costFunctions) {
|
for (CostFunction c : costFunctions) {
|
||||||
if (c.getMultiplier() <= 0 || !c.isNeeded()) {
|
if (!c.isNeeded()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
double cost = c.getMultiplier() * c.cost();
|
double cost = c.getMultiplier() * c.cost();
|
||||||
@ -696,7 +703,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
|
allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
|
||||||
void updateCostsWithAction(Cluster cluster, Action action) {
|
void updateCostsWithAction(Cluster cluster, Action action) {
|
||||||
for (CostFunction c : costFunctions) {
|
for (CostFunction c : costFunctions) {
|
||||||
if (c.getMultiplier() > 0 && c.isNeeded()) {
|
if (c.isNeeded()) {
|
||||||
c.postAction(action);
|
c.postAction(action);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -735,7 +742,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||||||
CostFunction c = costFunctions.get(i);
|
CostFunction c = costFunctions.get(i);
|
||||||
this.tempFunctionCosts[i] = 0.0;
|
this.tempFunctionCosts[i] = 0.0;
|
||||||
|
|
||||||
if (c.getMultiplier() <= 0 || !c.isNeeded()) {
|
if (!c.isNeeded()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user