HBASE-26311 Balancer gets stuck in cohosted replica distribution (#3804)

Signed-off-by: Huaxiang Sun <huaxiangsun@apache.org>
This commit is contained in:
clarax 2021-11-03 11:58:38 -07:00 committed by GitHub
parent 9eb3706021
commit e06208f3d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 61 additions and 81 deletions

View File

@ -84,12 +84,8 @@ class BalancerClusterState {
int[] regionIndexToServerIndex; // regionIndex -> serverIndex
int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state)
int[] regionIndexToTableIndex; // regionIndex -> tableIndex
int[][] numRegionsPerServerPerTable; // serverIndex -> tableIndex -> # regions
int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions
int[] numRegionsPerTable; // tableIndex -> region count
double[] meanRegionsPerTable; // mean region count per table
double[] regionSkewByTable; // skew on RS per by table
double[] minRegionSkewByTable; // min skew on RS per by table
double[] maxRegionSkewByTable; // max skew on RS per by table
int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS
int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary
boolean hasRegionReplicas = false; // whether there is regions with replicas
@ -302,42 +298,24 @@ class BalancerClusterState {
}
numTables = tables.size();
LOG.debug("Number of tables={}", numTables);
numRegionsPerServerPerTable = new int[numServers][numTables];
LOG.debug("Number of tables={}, number of hosts={}, number of racks={}", numTables,
numHosts, numRacks);
numRegionsPerServerPerTable = new int[numTables][numServers];
numRegionsPerTable = new int[numTables];
for (int i = 0; i < numServers; i++) {
for (int j = 0; j < numTables; j++) {
for (int i = 0; i < numTables; i++) {
for (int j = 0; j < numServers; j++) {
numRegionsPerServerPerTable[i][j] = 0;
}
}
for (int i = 0; i < regionIndexToServerIndex.length; i++) {
if (regionIndexToServerIndex[i] >= 0) {
numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
numRegionsPerServerPerTable[regionIndexToTableIndex[i]][regionIndexToServerIndex[i]]++;
numRegionsPerTable[regionIndexToTableIndex[i]]++;
}
}
// Avoid repeated computation for planning
meanRegionsPerTable = new double[numTables];
regionSkewByTable = new double[numTables];
maxRegionSkewByTable = new double[numTables];
minRegionSkewByTable = new double[numTables];
for (int i = 0; i < numTables; i++) {
meanRegionsPerTable[i] = Double.valueOf(numRegionsPerTable[i]) / numServers;
minRegionSkewByTable[i] += DoubleArrayCost.getMinSkew(numRegionsPerTable[i], numServers);
maxRegionSkewByTable[i] += DoubleArrayCost.getMaxSkew(numRegionsPerTable[i], numServers);
}
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
for (int tableIdx = 0; tableIdx < aNumRegionsPerServerPerTable.length; tableIdx++) {
regionSkewByTable[tableIdx] +=
Math.abs(aNumRegionsPerServerPerTable[tableIdx] - meanRegionsPerTable[tableIdx]);
}
}
for (int i = 0; i < regions.length; i++) {
RegionInfo info = regions[i];
if (RegionReplicaUtil.isDefaultReplica(info)) {
@ -683,14 +661,9 @@ class BalancerClusterState {
}
int tableIndex = regionIndexToTableIndex[region];
if (oldServer >= 0) {
numRegionsPerServerPerTable[oldServer][tableIndex]--;
// update regionSkewPerTable for the move from old server
regionSkewByTable[tableIndex] += getSkewChangeFor(oldServer, tableIndex, -1);
numRegionsPerServerPerTable[tableIndex][oldServer]--;
}
numRegionsPerServerPerTable[newServer][tableIndex]++;
// update regionSkewPerTable for the move to new server
regionSkewByTable[tableIndex] += getSkewChangeFor(newServer, tableIndex, 1);
numRegionsPerServerPerTable[tableIndex][newServer]++;
// update for servers
int primary = regionIndexToPrimaryIndex[region];
@ -864,18 +837,9 @@ class BalancerClusterState {
.append(Arrays.toString(serverIndicesSortedByRegionCount)).append(", regionsPerServer=")
.append(Arrays.deepToString(regionsPerServer));
desc.append(", regionSkewByTable=").append(Arrays.toString(regionSkewByTable))
.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
desc.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
.append(", numTables=").append(numTables).append(", numMovedRegions=").append(numMovedRegions)
.append('}');
return desc.toString();
}
private double getSkewChangeFor(int serverIndex, int tableIndex, double regionCountChange) {
double curSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
meanRegionsPerTable[tableIndex]);
double oldSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
regionCountChange - meanRegionsPerTable[tableIndex]);
return curSkew - oldSkew;
}
}

View File

@ -66,6 +66,9 @@ final class DoubleArrayCost {
}
private static double computeCost(double[] stats) {
if (stats == null || stats.length == 0) {
return 0;
}
double totalCost = 0;
double total = getSum(stats);
@ -74,10 +77,11 @@ final class DoubleArrayCost {
for (int i = 0; i < stats.length; i++) {
double n = stats[i];
double diff = Math.abs(mean - n);
double diff = (mean - n) * (mean - n);
totalCost += diff;
}
// No need to compute standard deviation with division by cluster size when scaling.
totalCost = Math.sqrt(totalCost);
return CostFunction.scale(getMinSkew(total, count),
getMaxSkew(total, count), totalCost);
}
@ -95,18 +99,22 @@ final class DoubleArrayCost {
* @param total is total number of regions
*/
public static double getMinSkew(double total, double numServers) {
if (numServers == 0) {
return 0;
}
double mean = total / numServers;
// It's possible that there aren't enough regions to go around
double min;
if (numServers > total) {
min = ((numServers - total) * mean + (1 - mean) * total) ;
min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total);
} else {
// Some will have 1 more than everything else.
int numHigh = (int) (total - (Math.floor(mean) * numServers));
int numLow = (int) (numServers - numHigh);
min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean));
min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean) +
numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean));
}
return min;
return Math.sqrt(min);
}
/**
@ -116,7 +124,10 @@ final class DoubleArrayCost {
* @param total is total number of regions
*/
public static double getMaxSkew(double total, double numServers) {
if (numServers == 0) {
return 0;
}
double mean = total / numServers;
return (total - mean) + (numServers - 1) * mean;
return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean);
}
}

View File

@ -35,7 +35,7 @@ class MoveCostFunction extends CostFunction {
static final float DEFAULT_MOVE_COST = 7;
static final float DEFAULT_MOVE_COST_OFFPEAK = 3;
private static final int DEFAULT_MAX_MOVES = 600;
private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
private static final float DEFAULT_MAX_MOVE_PERCENT = 1.0f;
private final float maxMovesPercent;
private final OffPeakHours offPeakHours;
@ -80,4 +80,4 @@ class MoveCostFunction extends CostFunction {
return scale(0, Math.min(cluster.numRegions, maxMoves), moveCost);
}
}
}

View File

@ -19,8 +19,6 @@ package org.apache.hadoop.hbase.master.balancer;
import org.apache.hadoop.conf.Configuration;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Compute the cost of a potential cluster state from skew in number of regions on a cluster.
@ -28,8 +26,6 @@ import org.slf4j.LoggerFactory;
@InterfaceAudience.Private
class RegionCountSkewCostFunction extends CostFunction {
private static final Logger LOG = LoggerFactory.getLogger(RegionCountSkewCostFunction.class);
static final String REGION_COUNT_SKEW_COST_KEY =
"hbase.master.balancer.stochastic.regionCountCost";
static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
@ -50,14 +46,6 @@ class RegionCountSkewCostFunction extends CostFunction {
costs[i] = cluster.regionsPerServer[i].length;
}
});
LOG.debug("{} sees a total of {} servers and {} regions.", getClass().getSimpleName(),
cluster.numServers, cluster.numRegions);
if (LOG.isTraceEnabled()) {
for (int i = 0; i < cluster.numServers; i++) {
LOG.trace("{} sees server '{}' has {} regions", getClass().getSimpleName(),
cluster.servers[i], cluster.regionsPerServer[i].length);
}
}
}
@Override
@ -72,4 +60,4 @@ class RegionCountSkewCostFunction extends CostFunction {
costs[newServer] = cluster.regionsPerServer[newServer].length;
});
}
}
}

View File

@ -30,17 +30,42 @@ class TableSkewCostFunction extends CostFunction {
private static final String TABLE_SKEW_COST_KEY =
"hbase.master.balancer.stochastic.tableSkewCost";
private static final float DEFAULT_TABLE_SKEW_COST = 35;
DoubleArrayCost[] costsPerTable;
TableSkewCostFunction(Configuration conf) {
this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
}
@Override
void prepare(BalancerClusterState cluster) {
super.prepare(cluster);
costsPerTable = new DoubleArrayCost[cluster.numTables];
for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
costsPerTable[tableIdx] = new DoubleArrayCost();
costsPerTable[tableIdx].prepare(cluster.numServers);
final int tableIndex = tableIdx;
costsPerTable[tableIdx].applyCostsChange(costs -> {
// Keep a cached deep copy for change-only recomputation
for (int i = 0; i < cluster.numServers; i++) {
costs[i] = cluster.numRegionsPerServerPerTable[tableIndex][i];
}
});
}
}
@Override
protected void regionMoved(int region, int oldServer, int newServer) {
int tableIdx = cluster.regionIndexToTableIndex[region];
costsPerTable[tableIdx].applyCostsChange(costs -> {
costs[oldServer] = cluster.numRegionsPerServerPerTable[tableIdx][oldServer];
costs[newServer] = cluster.numRegionsPerServerPerTable[tableIdx][newServer];
});
}
@Override
protected double cost() {
double cost = 0;
for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
cost += scale(cluster.minRegionSkewByTable[tableIdx],
cluster.maxRegionSkewByTable[tableIdx], cluster.regionSkewByTable[tableIdx]);
cost += costsPerTable[tableIdx].cost();
}
return cost;
}

View File

@ -75,7 +75,6 @@ public class BalancerTestBase {
public static void beforeAllTests() throws Exception {
conf = HBaseConfiguration.create();
conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setFloat("hbase.regions.slop", 0.0f);
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
loadBalancer = new StochasticLoadBalancer(dummyMetricsStochasticBalancer);

View File

@ -24,7 +24,6 @@ public class BalancerTestBase2 extends BalancerTestBase {
@Before
public void before() {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 1000L);

View File

@ -369,8 +369,6 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
// now move region1 from servers[0] to servers[2]
cluster.doAction(new MoveRegionAction(0, 0, 2));
// check that the regionSkewByTable for "table" has increased to 2
assertEquals(2, cluster.regionSkewByTable[0], 0.01);
// now repeat check whether moving region1 from servers[1] to servers[2]
// would lower availability
assertTrue(cluster.wouldLowerAvailability(hri1, servers[2]));

View File

@ -62,6 +62,6 @@ public class TestDoubleArrayCost {
}
costs[100] = 100;
});
assertEquals(0.5, cost.cost(), 0.01);
assertEquals(0.0708, cost.cost(), 0.01);
}
}

View File

@ -94,7 +94,6 @@ public class TestStochasticBalancerJmxMetrics extends BalancerTestBase {
conf = UTIL.getConfiguration();
conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setFloat("hbase.regions.slop", 0.0f);
conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, JMXListener.class.getName());
Random rand = new Random();

View File

@ -354,13 +354,13 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
cluster.setNumRegions(10000);
cluster.setNumMovedRegions(250);
cost = costFunction.cost();
assertEquals(0.1f, cost, 0.001);
assertEquals(0.025f, cost, 0.001);
cluster.setNumMovedRegions(1250);
cost = costFunction.cost();
assertEquals(0.5f, cost, 0.001);
assertEquals(0.125f, cost, 0.001);
cluster.setNumMovedRegions(2500);
cost = costFunction.cost();
assertEquals(1.0f, cost, 0.01);
assertEquals(0.25f, cost, 0.01);
}
}

View File

@ -52,7 +52,6 @@ public class TestStochasticLoadBalancerBalanceCluster extends BalancerTestBase {
@Test
public void testBalanceCluster() throws Exception {
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 1000); // 3 min
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L);
loadBalancer.onConfigurationChange(conf);

View File

@ -41,7 +41,6 @@ public class TestStochasticLoadBalancerRegionReplicaSameHosts extends BalancerTe
public void testRegionReplicationOnMidClusterSameHosts() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.onConfigurationChange(conf);
int numHosts = 30;
int numRegions = 30 * 30;

View File

@ -60,7 +60,6 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends BalancerTe
public void testRegionReplicationOnMidClusterWithRacks() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 100000000L);
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
loadBalancer.onConfigurationChange(conf);
int numNodes = 4;