HBASE-26311 Balancer gets stuck in cohosted replica distribution (#3805)
Signed-off-by: Huaxiang Sun <huaxiangsun@apache.org>
This commit is contained in:
parent
0b7630bc1f
commit
8ea548441c
|
@ -167,12 +167,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
int[] regionIndexToServerIndex; //regionIndex -> serverIndex
|
||||
int[] initialRegionIndexToServerIndex; //regionIndex -> serverIndex (initial cluster state)
|
||||
int[] regionIndexToTableIndex; //regionIndex -> tableIndex
|
||||
int[][] numRegionsPerServerPerTable; // serverIndex -> tableIndex -> # regions
|
||||
int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions
|
||||
int[] numRegionsPerTable; // tableIndex -> region count
|
||||
double[] meanRegionsPerTable; // mean region count per table
|
||||
double[] regionSkewByTable; // skew on RS per by table
|
||||
double[] minRegionSkewByTable; // min skew on RS per by table
|
||||
double[] maxRegionSkewByTable; // max skew on RS per by table
|
||||
int[] regionIndexToPrimaryIndex; //regionIndex -> regionIndex of the primary
|
||||
boolean hasRegionReplicas = false; //whether there is regions with replicas
|
||||
|
||||
|
@ -382,42 +378,24 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
}
|
||||
|
||||
numTables = tables.size();
|
||||
LOG.debug("Number of tables={}", numTables);
|
||||
numRegionsPerServerPerTable = new int[numServers][numTables];
|
||||
LOG.debug("Number of tables={}, number of hosts={}, number of racks={}", numTables,
|
||||
numHosts, numRacks);
|
||||
numRegionsPerServerPerTable = new int[numTables][numServers];
|
||||
numRegionsPerTable = new int[numTables];
|
||||
|
||||
for (int i = 0; i < numServers; i++) {
|
||||
for (int j = 0; j < numTables; j++) {
|
||||
for (int i = 0; i < numTables; i++) {
|
||||
for (int j = 0; j < numServers; j++) {
|
||||
numRegionsPerServerPerTable[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i < regionIndexToServerIndex.length; i++) {
|
||||
if (regionIndexToServerIndex[i] >= 0) {
|
||||
numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
|
||||
numRegionsPerServerPerTable[regionIndexToTableIndex[i]][regionIndexToServerIndex[i]]++;
|
||||
numRegionsPerTable[regionIndexToTableIndex[i]]++;
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid repeated computation for planning
|
||||
meanRegionsPerTable = new double[numTables];
|
||||
regionSkewByTable = new double[numTables];
|
||||
maxRegionSkewByTable = new double[numTables];
|
||||
minRegionSkewByTable = new double[numTables];
|
||||
|
||||
for (int i = 0; i < numTables; i++) {
|
||||
meanRegionsPerTable[i] = Double.valueOf(numRegionsPerTable[i]) / numServers;
|
||||
minRegionSkewByTable[i] += DoubleArrayCost.getMinSkew(numRegionsPerTable[i], numServers);
|
||||
maxRegionSkewByTable[i] += DoubleArrayCost.getMaxSkew(numRegionsPerTable[i], numServers);
|
||||
}
|
||||
|
||||
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
|
||||
for (int tableIdx = 0; tableIdx < aNumRegionsPerServerPerTable.length; tableIdx++) {
|
||||
regionSkewByTable[tableIdx] +=
|
||||
Math.abs(aNumRegionsPerServerPerTable[tableIdx] - meanRegionsPerTable[tableIdx]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < regions.length; i ++) {
|
||||
RegionInfo info = regions[i];
|
||||
if (RegionReplicaUtil.isDefaultReplica(info)) {
|
||||
|
@ -853,15 +831,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
}
|
||||
int tableIndex = regionIndexToTableIndex[region];
|
||||
if (oldServer >= 0) {
|
||||
numRegionsPerServerPerTable[oldServer][tableIndex]--;
|
||||
// update regionSkewPerTable for the move from old server
|
||||
regionSkewByTable[tableIndex] += getSkewChangeFor(oldServer, tableIndex, -1);
|
||||
numRegionsPerServerPerTable[tableIndex][oldServer]--;
|
||||
}
|
||||
numRegionsPerServerPerTable[newServer][tableIndex]++;
|
||||
|
||||
// update regionSkewPerTable for the move to new server
|
||||
regionSkewByTable[tableIndex] += getSkewChangeFor(newServer, tableIndex, 1);
|
||||
|
||||
numRegionsPerServerPerTable[tableIndex][newServer]++;
|
||||
// update for servers
|
||||
int primary = regionIndexToPrimaryIndex[region];
|
||||
if (oldServer >= 0) {
|
||||
|
@ -1034,20 +1006,11 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
.append(Arrays.toString(serverIndicesSortedByRegionCount))
|
||||
.append(", regionsPerServer=").append(Arrays.deepToString(regionsPerServer));
|
||||
|
||||
desc.append(", regionSkewByTable=").append(Arrays.toString(regionSkewByTable))
|
||||
.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
|
||||
.append(", numTables=").append(numTables).append(", numMovedRegions=")
|
||||
desc.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
|
||||
.append(", numTables=").append(numTables).append(", numMovedRegions=")
|
||||
.append(numMovedRegions).append('}');
|
||||
return desc.toString();
|
||||
}
|
||||
|
||||
private double getSkewChangeFor(int serverIndex, int tableIndex, double regionCountChange) {
|
||||
double curSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
|
||||
meanRegionsPerTable[tableIndex]);
|
||||
double oldSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
|
||||
regionCountChange - meanRegionsPerTable[tableIndex]);
|
||||
return curSkew - oldSkew;
|
||||
}
|
||||
}
|
||||
|
||||
// slop for regions
|
||||
|
|
|
@ -66,6 +66,9 @@ final class DoubleArrayCost {
|
|||
}
|
||||
|
||||
private static double computeCost(double[] stats) {
|
||||
if (stats == null || stats.length == 0) {
|
||||
return 0;
|
||||
}
|
||||
double totalCost = 0;
|
||||
double total = getSum(stats);
|
||||
|
||||
|
@ -74,10 +77,11 @@ final class DoubleArrayCost {
|
|||
|
||||
for (int i = 0; i < stats.length; i++) {
|
||||
double n = stats[i];
|
||||
double diff = Math.abs(mean - n);
|
||||
double diff = (mean - n) * (mean - n);
|
||||
totalCost += diff;
|
||||
}
|
||||
|
||||
// No need to compute standard deviation with division by cluster size when scaling.
|
||||
totalCost = Math.sqrt(totalCost);
|
||||
return StochasticLoadBalancer.scale(getMinSkew(total, count),
|
||||
getMaxSkew(total, count), totalCost);
|
||||
}
|
||||
|
@ -95,18 +99,22 @@ final class DoubleArrayCost {
|
|||
* @param total is total number of regions
|
||||
*/
|
||||
public static double getMinSkew(double total, double numServers) {
|
||||
if (numServers == 0) {
|
||||
return 0;
|
||||
}
|
||||
double mean = total / numServers;
|
||||
// It's possible that there aren't enough regions to go around
|
||||
double min;
|
||||
if (numServers > total) {
|
||||
min = ((numServers - total) * mean + (1 - mean) * total) ;
|
||||
min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total);
|
||||
} else {
|
||||
// Some will have 1 more than everything else.
|
||||
int numHigh = (int) (total - (Math.floor(mean) * numServers));
|
||||
int numLow = (int) (numServers - numHigh);
|
||||
min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean));
|
||||
min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean) +
|
||||
numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean));
|
||||
}
|
||||
return min;
|
||||
return Math.sqrt(min);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -116,7 +124,10 @@ final class DoubleArrayCost {
|
|||
* @param total is total number of regions
|
||||
*/
|
||||
public static double getMaxSkew(double total, double numServers) {
|
||||
if (numServers == 0) {
|
||||
return 0;
|
||||
}
|
||||
double mean = total / numServers;
|
||||
return (total - mean) + (numServers - 1) * mean;
|
||||
return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -884,7 +884,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
static final float DEFAULT_MOVE_COST = 7;
|
||||
static final float DEFAULT_MOVE_COST_OFFPEAK = 3;
|
||||
private static final int DEFAULT_MAX_MOVES = 600;
|
||||
private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
|
||||
private static final float DEFAULT_MAX_MOVE_PERCENT = 1.0f;
|
||||
|
||||
private final float maxMovesPercent;
|
||||
private final OffPeakHours offPeakHours;
|
||||
|
@ -959,14 +959,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
costs[i] = cluster.regionsPerServer[i].length;
|
||||
}
|
||||
});
|
||||
LOG.debug("{} sees a total of {} servers and {} regions.", getClass().getSimpleName(),
|
||||
cluster.numServers, cluster.numRegions);
|
||||
if (LOG.isTraceEnabled()) {
|
||||
for (int i =0; i < cluster.numServers; i++) {
|
||||
LOG.trace("{} sees server '{}' has {} regions", getClass().getSimpleName(),
|
||||
cluster.servers[i], cluster.regionsPerServer[i].length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1052,18 +1044,43 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
private static final String TABLE_SKEW_COST_KEY =
|
||||
"hbase.master.balancer.stochastic.tableSkewCost";
|
||||
private static final float DEFAULT_TABLE_SKEW_COST = 35;
|
||||
DoubleArrayCost[] costsPerTable;
|
||||
|
||||
TableSkewCostFunction(Configuration conf) {
|
||||
super(conf);
|
||||
this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
|
||||
}
|
||||
@Override
|
||||
void init(Cluster cluster) {
|
||||
super.init(cluster);
|
||||
costsPerTable = new DoubleArrayCost[cluster.numTables];
|
||||
for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
|
||||
costsPerTable[tableIdx] = new DoubleArrayCost();
|
||||
costsPerTable[tableIdx].prepare(cluster.numServers);
|
||||
final int tableIndex = tableIdx;
|
||||
costsPerTable[tableIdx].applyCostsChange(costs -> {
|
||||
// Keep a cached deep copy for change-only recomputation
|
||||
for (int i = 0; i < cluster.numServers; i++) {
|
||||
costs[i] = cluster.numRegionsPerServerPerTable[tableIndex][i];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void regionMoved(int region, int oldServer, int newServer) {
|
||||
int tableIdx = cluster.regionIndexToTableIndex[region];
|
||||
costsPerTable[tableIdx].applyCostsChange(costs -> {
|
||||
costs[oldServer] = cluster.numRegionsPerServerPerTable[tableIdx][oldServer];
|
||||
costs[newServer] = cluster.numRegionsPerServerPerTable[tableIdx][newServer];
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double cost() {
|
||||
double cost = 0;
|
||||
for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
|
||||
cost += scale(cluster.minRegionSkewByTable[tableIdx],
|
||||
cluster.maxRegionSkewByTable[tableIdx], cluster.regionSkewByTable[tableIdx]);
|
||||
cost += costsPerTable[tableIdx].cost();
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
|
|
@ -69,7 +69,6 @@ public class BalancerTestBase {
|
|||
public static void beforeAllTests() throws Exception {
|
||||
conf = HBaseConfiguration.create();
|
||||
conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
|
||||
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
|
||||
conf.setFloat("hbase.regions.slop", 0.0f);
|
||||
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
|
||||
loadBalancer = new StochasticLoadBalancer();
|
||||
|
|
|
@ -389,8 +389,6 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
|
|||
|
||||
// now move region1 from servers[0] to servers[2]
|
||||
cluster.doAction(new MoveRegionAction(0, 0, 2));
|
||||
// check that the regionSkewByTable for "table" has increased to 2
|
||||
assertEquals(2, cluster.regionSkewByTable[0], 0.01);
|
||||
// now repeat check whether moving region1 from servers[1] to servers[2]
|
||||
// would lower availability
|
||||
assertTrue(cluster.wouldLowerAvailability(hri1, servers[2]));
|
||||
|
|
|
@ -62,6 +62,6 @@ public class TestDoubleArrayCost {
|
|||
}
|
||||
costs[100] = 100;
|
||||
});
|
||||
assertEquals(0.5, cost.cost(), 0.01);
|
||||
assertEquals(0.0708, cost.cost(), 0.01);
|
||||
}
|
||||
}
|
|
@ -94,7 +94,6 @@ public class TestStochasticBalancerJmxMetrics extends BalancerTestBase {
|
|||
conf = UTIL.getConfiguration();
|
||||
|
||||
conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
|
||||
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
|
||||
conf.setFloat("hbase.regions.slop", 0.0f);
|
||||
conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, JMXListener.class.getName());
|
||||
Random rand = new Random();
|
||||
|
|
|
@ -263,13 +263,13 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
|||
cluster.setNumRegions(10000);
|
||||
cluster.setNumMovedRegions(250);
|
||||
cost = costFunction.cost();
|
||||
assertEquals(0.1f, cost, 0.001);
|
||||
assertEquals(0.025f, cost, 0.001);
|
||||
cluster.setNumMovedRegions(1250);
|
||||
cost = costFunction.cost();
|
||||
assertEquals(0.5f, cost, 0.001);
|
||||
assertEquals(0.125f, cost, 0.001);
|
||||
cluster.setNumMovedRegions(2500);
|
||||
cost = costFunction.cost();
|
||||
assertEquals(1.0f, cost, 0.01);
|
||||
assertEquals(0.25f, cost, 0.01);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,6 @@ public class TestStochasticLoadBalancerBalanceCluster extends BalancerTestBase {
|
|||
@Test
|
||||
public void testBalanceCluster() throws Exception {
|
||||
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 1000); // 300 sec
|
||||
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
|
||||
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L);
|
||||
loadBalancer.setConf(conf);
|
||||
for (int[] mockCluster : clusterStateMocks) {
|
||||
|
|
|
@ -41,7 +41,6 @@ public class TestStochasticLoadBalancerRegionReplicaSameHosts extends BalancerTe
|
|||
public void testRegionReplicationOnMidClusterSameHosts() {
|
||||
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
|
||||
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
|
||||
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
|
||||
loadBalancer.setConf(conf);
|
||||
int numHosts = 30;
|
||||
int numRegions = 30 * 30;
|
||||
|
|
|
@ -58,7 +58,6 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends BalancerTe
|
|||
|
||||
@Test
|
||||
public void testRegionReplicationOnMidClusterWithRacks() {
|
||||
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 100000000L);
|
||||
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
|
||||
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
|
||||
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
|
||||
|
|
Loading…
Reference in New Issue