HBASE-26311 Balancer gets stuck in cohosted replica distribution (#3724)

Signed-off-by: Huaxiang Sun <huaxiangsun@apache.org>
This commit is contained in:
clarax 2021-11-03 11:57:37 -07:00 committed by GitHub
parent 72a88468a8
commit cd7a5099d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 68 additions and 81 deletions

View File

@ -84,12 +84,9 @@ class BalancerClusterState {
int[] regionIndexToServerIndex; // regionIndex -> serverIndex
int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state)
int[] regionIndexToTableIndex; // regionIndex -> tableIndex
int[][] numRegionsPerServerPerTable; // serverIndex -> tableIndex -> # regions
int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> tableIndex -> # regions
int[] numRegionsPerTable; // tableIndex -> region count
double[] meanRegionsPerTable; // mean region count per table
double[] regionSkewByTable; // skew on RS per by table
double[] minRegionSkewByTable; // min skew on RS per by table
double[] maxRegionSkewByTable; // max skew on RS per by table
int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary
boolean hasRegionReplicas = false; // whether there is regions with replicas
@ -283,6 +280,11 @@ class BalancerClusterState {
regionIndex++;
}
if (LOG.isDebugEnabled()) {
for (int i = 0; i < numServers; i++) {
LOG.debug("server {} has {} regions", i, regionsPerServer[i].length);
}
}
for (int i = 0; i < serversPerHostList.size(); i++) {
serversPerHost[i] = new int[serversPerHostList.get(i).size()];
for (int j = 0; j < serversPerHost[i].length; j++) {
@ -303,40 +305,29 @@ class BalancerClusterState {
}
numTables = tables.size();
LOG.debug("Number of tables={}", numTables);
numRegionsPerServerPerTable = new int[numServers][numTables];
LOG.debug("Number of tables={}, number of hosts={}, number of racks={}", numTables,
numHosts, numRacks);
numRegionsPerServerPerTable = new int[numTables][numServers];
numRegionsPerTable = new int[numTables];
for (int i = 0; i < numServers; i++) {
for (int j = 0; j < numTables; j++) {
for (int i = 0; i < numTables; i++) {
for (int j = 0; j < numServers; j++) {
numRegionsPerServerPerTable[i][j] = 0;
}
}
for (int i = 0; i < regionIndexToServerIndex.length; i++) {
if (regionIndexToServerIndex[i] >= 0) {
numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
numRegionsPerServerPerTable[regionIndexToTableIndex[i]][regionIndexToServerIndex[i]]++;
numRegionsPerTable[regionIndexToTableIndex[i]]++;
}
}
// Avoid repeated computation for planning
meanRegionsPerTable = new double[numTables];
regionSkewByTable = new double[numTables];
maxRegionSkewByTable = new double[numTables];
minRegionSkewByTable = new double[numTables];
for (int i = 0; i < numTables; i++) {
meanRegionsPerTable[i] = Double.valueOf(numRegionsPerTable[i]) / numServers;
minRegionSkewByTable[i] += DoubleArrayCost.getMinSkew(numRegionsPerTable[i], numServers);
maxRegionSkewByTable[i] += DoubleArrayCost.getMaxSkew(numRegionsPerTable[i], numServers);
}
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
for (int tableIdx = 0; tableIdx < aNumRegionsPerServerPerTable.length; tableIdx++) {
regionSkewByTable[tableIdx] += Math.abs(aNumRegionsPerServerPerTable[tableIdx] -
meanRegionsPerTable[tableIdx]);
}
}
for (int i = 0; i < regions.length; i++) {
@ -684,14 +675,9 @@ class BalancerClusterState {
}
int tableIndex = regionIndexToTableIndex[region];
if (oldServer >= 0) {
numRegionsPerServerPerTable[oldServer][tableIndex]--;
// update regionSkewPerTable for the move from old server
regionSkewByTable[tableIndex] += getSkewChangeFor(oldServer, tableIndex, -1);
numRegionsPerServerPerTable[tableIndex][oldServer]--;
}
numRegionsPerServerPerTable[newServer][tableIndex]++;
// update regionSkewPerTable for the move to new server
regionSkewByTable[tableIndex] += getSkewChangeFor(newServer, tableIndex, 1);
numRegionsPerServerPerTable[tableIndex][newServer]++;
// update for servers
int primary = regionIndexToPrimaryIndex[region];
@ -865,18 +851,9 @@ class BalancerClusterState {
.append(Arrays.toString(serverIndicesSortedByRegionCount)).append(", regionsPerServer=")
.append(Arrays.deepToString(regionsPerServer));
desc.append(", regionSkewByTable=").append(Arrays.toString(regionSkewByTable))
.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
desc.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
.append(", numTables=").append(numTables).append(", numMovedRegions=").append(numMovedRegions)
.append('}');
return desc.toString();
}
private double getSkewChangeFor(int serverIndex, int tableIndex, int regionCountChange) {
double curSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
meanRegionsPerTable[tableIndex]);
double oldSkew = Math.abs(numRegionsPerServerPerTable[serverIndex][tableIndex] -
regionCountChange - meanRegionsPerTable[tableIndex]);
return curSkew - oldSkew;
}
}

View File

@ -66,17 +66,21 @@ final class DoubleArrayCost {
}
private static double computeCost(double[] stats) {
if (stats == null || stats.length == 0) {
return 0;
}
double totalCost = 0;
double total = getSum(stats);
double count = stats.length;
double mean = total / count;
for (int i = 0; i < stats.length; i++) {
double n = stats[i];
double diff = Math.abs(mean - n);
double diff = (mean - n) * (mean - n);
totalCost += diff;
}
// No need to compute standard deviation with division by cluster size when scaling.
totalCost = Math.sqrt(totalCost);
return CostFunction.scale(getMinSkew(total, count),
getMaxSkew(total, count), totalCost);
}
@ -94,18 +98,22 @@ final class DoubleArrayCost {
* @param total is total number of regions
*/
public static double getMinSkew(double total, double numServers) {
if (numServers == 0) {
return 0;
}
double mean = total / numServers;
// It's possible that there aren't enough regions to go around
double min;
if (numServers > total) {
min = ((numServers - total) * mean + (1 - mean) * total) ;
min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total) ;
} else {
// Some will have 1 more than everything else.
int numHigh = (int) (total - (Math.floor(mean) * numServers));
int numLow = (int) (numServers - numHigh);
min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean));
min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean) +
numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean));
}
return min;
return Math.sqrt(min);
}
/**
@ -114,7 +122,10 @@ final class DoubleArrayCost {
* a zero sum cost for this to make sense.
*/
public static double getMaxSkew(double total, double numServers) {
if (numServers == 0) {
return 0;
}
double mean = total / numServers;
return (total - mean) + (numServers - 1) * mean;
return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean);
}
}

View File

@ -34,7 +34,7 @@ class MoveCostFunction extends CostFunction {
static final float DEFAULT_MOVE_COST = 7;
static final float DEFAULT_MOVE_COST_OFFPEAK = 3;
private static final int DEFAULT_MAX_MOVES = 600;
private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
private static final float DEFAULT_MAX_MOVE_PERCENT = 1.0f;
private final float maxMovesPercent;
private final ClusterInfoProvider provider;

View File

@ -19,17 +19,12 @@ package org.apache.hadoop.hbase.master.balancer;
import org.apache.hadoop.conf.Configuration;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Compute the cost of a potential cluster state from skew in number of regions on a cluster.
*/
@InterfaceAudience.Private
class RegionCountSkewCostFunction extends CostFunction {
private static final Logger LOG = LoggerFactory.getLogger(RegionCountSkewCostFunction.class);
static final String REGION_COUNT_SKEW_COST_KEY =
"hbase.master.balancer.stochastic.regionCountCost";
static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
@ -50,14 +45,6 @@ class RegionCountSkewCostFunction extends CostFunction {
costs[i] = cluster.regionsPerServer[i].length;
}
});
LOG.debug("{} sees a total of {} servers and {} regions.", getClass().getSimpleName(),
cluster.numServers, cluster.numRegions);
if (LOG.isTraceEnabled()) {
for (int i = 0; i < cluster.numServers; i++) {
LOG.trace("{} sees server '{}' has {} regions", getClass().getSimpleName(),
cluster.servers[i], cluster.regionsPerServer[i].length);
}
}
}
@Override

View File

@ -29,17 +29,43 @@ class TableSkewCostFunction extends CostFunction {
private static final String TABLE_SKEW_COST_KEY =
"hbase.master.balancer.stochastic.tableSkewCost";
private static final float DEFAULT_TABLE_SKEW_COST = 35;
DoubleArrayCost[] costsPerTable;
TableSkewCostFunction(Configuration conf) {
this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
}
@Override
void prepare(BalancerClusterState cluster) {
super.prepare(cluster);
costsPerTable = new DoubleArrayCost[cluster.numTables];
for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
costsPerTable[tableIdx] = new DoubleArrayCost();
costsPerTable[tableIdx].prepare(cluster.numServers);
final int tableIndex = tableIdx;
costsPerTable[tableIdx].applyCostsChange(costs -> {
// Keep a cached deep copy for change-only recomputation
for (int i = 0; i < cluster.numServers; i++) {
costs[i] = cluster.numRegionsPerServerPerTable[tableIndex][i];
}
});
}
}
@Override
protected void regionMoved(int region, int oldServer, int newServer) {
int tableIdx = cluster.regionIndexToTableIndex[region];
costsPerTable[tableIdx].applyCostsChange(costs -> {
costs[oldServer] = cluster.numRegionsPerServerPerTable[tableIdx][oldServer];
costs[newServer] = cluster.numRegionsPerServerPerTable[tableIdx][newServer];
});
}
@Override
protected double cost() {
double cost = 0;
for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) {
cost += scale(cluster.minRegionSkewByTable[tableIdx],
cluster.maxRegionSkewByTable[tableIdx], cluster.regionSkewByTable[tableIdx]);
cost += costsPerTable[tableIdx].cost();
}
return cost;
}

View File

@ -45,7 +45,6 @@ public class StochasticBalancerTestBase extends BalancerTestBase {
public static void beforeAllTests() throws Exception {
conf = HBaseConfiguration.create();
conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setFloat("hbase.regions.slop", 0.0f);
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);

View File

@ -24,7 +24,6 @@ public class StochasticBalancerTestBase2 extends StochasticBalancerTestBase {
@Before
public void before() {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 1000L);

View File

@ -355,8 +355,6 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
// now move region1 from servers[0] to servers[2]
cluster.doAction(new MoveRegionAction(0, 0, 2));
// check that the regionSkewByTable for "table" has increased to 2
assertEquals(2, cluster.regionSkewByTable[0], 0.01);
// now repeat check whether moving region1 from servers[1] to servers[2]
// would lower availability
assertTrue(cluster.wouldLowerAvailability(hri1, servers[2]));

View File

@ -62,6 +62,6 @@ public class TestDoubleArrayCost {
}
costs[100] = 100;
});
assertEquals(0.5, cost.cost(), 0.01);
assertEquals(0.0708, cost.cost(), 0.01);
}
}

View File

@ -417,13 +417,13 @@ public class TestStochasticLoadBalancer extends StochasticBalancerTestBase {
cluster.setNumRegions(10000);
cluster.setNumMovedRegions(250);
cost = costFunction.cost();
assertEquals(0.1f, cost, 0.001);
assertEquals(0.025f, cost, 0.001);
cluster.setNumMovedRegions(1250);
cost = costFunction.cost();
assertEquals(0.5f, cost, 0.001);
assertEquals(0.125f, cost, 0.001);
cluster.setNumMovedRegions(2500);
cost = costFunction.cost();
assertEquals(1.0f, cost, 0.01);
assertEquals(0.25f, cost, 0.01);
}
}

View File

@ -51,7 +51,6 @@ public class TestStochasticLoadBalancerBalanceCluster extends StochasticBalancer
*/
@Test
public void testBalanceCluster() throws Exception {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.onConfigurationChange(conf);
for (int[] mockCluster : clusterStateMocks) {
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(mockCluster);

View File

@ -39,7 +39,6 @@ public class TestStochasticLoadBalancerLargeCluster extends StochasticBalancerTe
int numTables = 100;
int replication = 1;
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 6 * 60 * 1000);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.onConfigurationChange(conf);
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
}

View File

@ -41,7 +41,6 @@ public class TestStochasticLoadBalancerRegionReplicaSameHosts extends Stochastic
public void testRegionReplicationOnMidClusterSameHosts() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.onConfigurationChange(conf);
int numHosts = 30;
int numRegions = 30 * 30;

View File

@ -47,7 +47,6 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends Stochastic
this.numRacks = numRacks;
}
@Override
public String getRack(ServerName server) {
String key = server.getServerName();
@ -62,10 +61,7 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends Stochastic
public void testRegionReplicationOnMidClusterWithRacks() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 100000000L);
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
// for full balance
// conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.001f);
loadBalancer.onConfigurationChange(conf);
int numNodes = 5;
int numRegions = numNodes * 1;
@ -76,7 +72,6 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends Stochastic
Map<ServerName, List<RegionInfo>> serverMap =
createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables);
RackManager rm = new ForTestRackManager(numRacks);
testWithClusterWithIteration(serverMap, rm, true, true);
}
@ -84,7 +79,6 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends Stochastic
public void testRegionReplicationOnLargeClusterWithRacks() {
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", false);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 5000L);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 10 * 1000); // 10 sec
loadBalancer.onConfigurationChange(conf);
int numNodes = 100;

View File

@ -58,7 +58,7 @@ public class TestStochasticLoadBalancerSmallCluster extends StochasticBalancerTe
int numRegionsPerServer = 1; // all servers except one
int replication = 1;
int numTables = 10;
/* fails because of max moves */
// fails because of max moves
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, false,
false);
}

View File

@ -94,7 +94,6 @@ public class TestStochasticBalancerJmxMetrics extends BalancerTestBase {
conf = UTIL.getConfiguration();
conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setFloat("hbase.regions.slop", 0.0f);
conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, JMXListener.class.getName());
Random rand = new Random();