HBASE-25767 CandidateGenerator.getRandomIterationOrder is too slow on large cluster (#3149)

Signed-off-by: XinSun <ddupgs@gmail.com>
Signed-off-by: Yulin Niu <niuyulin@apache.org>
This commit is contained in:
Duo Zhang 2021-04-13 23:00:54 +08:00
parent 082ad7cf21
commit 69e93c8137
3 changed files with 37 additions and 70 deletions

View File

@ -18,13 +18,8 @@
package org.apache.hadoop.hbase.master.balancer; package org.apache.hadoop.hbase.master.balancer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
/** /**
@ -135,17 +130,4 @@ abstract class CandidateGenerator {
return BaseLoadBalancer.Cluster.NullAction; return BaseLoadBalancer.Cluster.NullAction;
} }
} }
/**
* Returns a random iteration order of indexes of an array with size length
*/
List<Integer> getRandomIterationOrder(int length) {
ArrayList<Integer> order = new ArrayList<>(length);
for (int i = 0; i < length; i++) {
order.add(i);
}
Collections.shuffle(order);
return order;
}
} }

View File

@ -18,42 +18,30 @@
package org.apache.hadoop.hbase.master.balancer; package org.apache.hadoop.hbase.master.balancer;
import org.apache.hadoop.hbase.master.MasterServices; import java.util.Optional;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.hbase.thirdparty.com.google.common.base.Optional;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private @InterfaceAudience.Private
class LocalityBasedCandidateGenerator extends CandidateGenerator { class LocalityBasedCandidateGenerator extends CandidateGenerator {
private MasterServices masterServices;
LocalityBasedCandidateGenerator(MasterServices masterServices) {
this.masterServices = masterServices;
}
@Override @Override
BaseLoadBalancer.Cluster.Action generate(BaseLoadBalancer.Cluster cluster) { BaseLoadBalancer.Cluster.Action generate(BaseLoadBalancer.Cluster cluster) {
if (this.masterServices == null) { // iterate through regions until you find one that is not on ideal host
int thisServer = pickRandomServer(cluster); // start from a random point to avoid always balance the regions in front
// Pick the other server if (cluster.numRegions > 0) {
int otherServer = pickOtherRandomServer(cluster, thisServer); int startIndex = ThreadLocalRandom.current().nextInt(cluster.numRegions);
return pickRandomRegions(cluster, thisServer, otherServer); for (int i = 0; i < cluster.numRegions; i++) {
} int region = (startIndex + i) % cluster.numRegions;
int currentServer = cluster.regionIndexToServerIndex[region];
// Randomly iterate through regions until you find one that is not on ideal host if (currentServer != cluster.getOrComputeRegionsToMostLocalEntities(
for (int region : getRandomIterationOrder(cluster.numRegions)) { BaseLoadBalancer.Cluster.LocalityType.SERVER)[region]) {
int currentServer = cluster.regionIndexToServerIndex[region]; Optional<BaseLoadBalancer.Cluster.Action> potential = tryMoveOrSwap(cluster,
if (currentServer != cluster.getOrComputeRegionsToMostLocalEntities( currentServer, region, cluster.getOrComputeRegionsToMostLocalEntities(
BaseLoadBalancer.Cluster.LocalityType.SERVER)[region]) { BaseLoadBalancer.Cluster.LocalityType.SERVER)[region]);
Optional<BaseLoadBalancer.Cluster.Action> potential = tryMoveOrSwap(cluster, if (potential.isPresent()) {
currentServer, region, return potential.get();
cluster.getOrComputeRegionsToMostLocalEntities( }
BaseLoadBalancer.Cluster.LocalityType.SERVER)[region]
);
if (potential.isPresent()) {
return potential.get();
} }
} }
} }
@ -69,25 +57,25 @@ class LocalityBasedCandidateGenerator extends CandidateGenerator {
// Compare locality gain/loss from swapping fromRegion with regions on toServer // Compare locality gain/loss from swapping fromRegion with regions on toServer
double fromRegionLocalityDelta = getWeightedLocality(cluster, fromRegion, toServer) double fromRegionLocalityDelta = getWeightedLocality(cluster, fromRegion, toServer)
- getWeightedLocality(cluster, fromRegion, fromServer); - getWeightedLocality(cluster, fromRegion, fromServer);
for (int toRegionIndex : getRandomIterationOrder(cluster.regionsPerServer[toServer].length)) { int toServertotalRegions = cluster.regionsPerServer[toServer].length;
int toRegion = cluster.regionsPerServer[toServer][toRegionIndex]; if (toServertotalRegions > 0) {
double toRegionLocalityDelta = getWeightedLocality(cluster, toRegion, fromServer) int startIndex = ThreadLocalRandom.current().nextInt(toServertotalRegions);
- getWeightedLocality(cluster, toRegion, toServer); for (int i = 0; i < toServertotalRegions; i++) {
// If locality would remain neutral or improve, attempt the swap int toRegionIndex = (startIndex + i) % toServertotalRegions;
if (fromRegionLocalityDelta + toRegionLocalityDelta >= 0) { int toRegion = cluster.regionsPerServer[toServer][toRegionIndex];
return Optional.of(getAction(fromServer, fromRegion, toServer, toRegion)); double toRegionLocalityDelta = getWeightedLocality(cluster, toRegion, fromServer) -
getWeightedLocality(cluster, toRegion, toServer);
// If locality would remain neutral or improve, attempt the swap
if (fromRegionLocalityDelta + toRegionLocalityDelta >= 0) {
return Optional.of(getAction(fromServer, fromRegion, toServer, toRegion));
}
} }
} }
return Optional.absent(); return Optional.empty();
} }
private double getWeightedLocality(BaseLoadBalancer.Cluster cluster, int region, int server) { private double getWeightedLocality(BaseLoadBalancer.Cluster cluster, int region, int server) {
return cluster.getOrComputeWeightedLocality(region, server, return cluster.getOrComputeWeightedLocality(region, server,
BaseLoadBalancer.Cluster.LocalityType.SERVER); BaseLoadBalancer.Cluster.LocalityType.SERVER);
} }
void setServices(MasterServices services) {
this.masterServices = services;
}
} }

View File

@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.BalancerDecision; import org.apache.hadoop.hbase.client.BalancerDecision;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
@ -187,7 +186,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember); numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
minCostNeedBalance = conf.getFloat(MIN_COST_NEED_BALANCE_KEY, minCostNeedBalance); minCostNeedBalance = conf.getFloat(MIN_COST_NEED_BALANCE_KEY, minCostNeedBalance);
if (localityCandidateGenerator == null) { if (localityCandidateGenerator == null) {
localityCandidateGenerator = new LocalityBasedCandidateGenerator(services); localityCandidateGenerator = new LocalityBasedCandidateGenerator();
} }
localityCost = new ServerLocalityCostFunction(conf); localityCost = new ServerLocalityCostFunction(conf);
rackLocalityCost = new RackLocalityCostFunction(conf); rackLocalityCost = new RackLocalityCostFunction(conf);
@ -307,18 +306,16 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
} }
} }
@Override
public synchronized void setMasterServices(MasterServices masterServices) {
super.setMasterServices(masterServices);
this.localityCandidateGenerator.setServices(masterServices);
}
@Override @Override
protected synchronized boolean areSomeRegionReplicasColocated(Cluster c) { protected synchronized boolean areSomeRegionReplicasColocated(Cluster c) {
regionReplicaHostCostFunction.init(c); regionReplicaHostCostFunction.init(c);
if (regionReplicaHostCostFunction.cost() > 0) return true; if (regionReplicaHostCostFunction.cost() > 0) {
return true;
}
regionReplicaRackCostFunction.init(c); regionReplicaRackCostFunction.init(c);
if (regionReplicaRackCostFunction.cost() > 0) return true; if (regionReplicaRackCostFunction.cost() > 0) {
return true;
}
return false; return false;
} }