HBASE-22349 slop in StochasticLoadBalancer (#4371)

Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
d-c-manning 2022-04-28 12:28:15 -07:00 committed by GitHub
parent ec78c2677b
commit f848c61e3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 254 additions and 136 deletions

View File

@ -25,6 +25,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
@ -139,6 +140,28 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return isServerExistsWithMoreRegions && isServerExistsWithZeroRegions;
}
protected final boolean sloppyRegionServerExist(ClusterLoadState cs) {
if (slop < 0) {
LOG.debug("Slop is less than zero, not checking for sloppiness.");
return false;
}
float average = cs.getLoadAverage(); // for logging
int floor = (int) Math.floor(average * (1 - slop));
int ceiling = (int) Math.ceil(average * (1 + slop));
if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
NavigableMap<ServerAndLoad, List<RegionInfo>> serversByLoad = cs.getServersByLoad();
if (LOG.isTraceEnabled()) {
// If nothing to balance, then don't say anything unless trace-level logging.
LOG.trace("Skipping load balancing because balanced cluster; " + "servers=" +
cs.getNumServers() + " regions=" + cs.getNumRegions() + " average=" + average +
" mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" +
serversByLoad.firstKey().getLoad());
}
return false;
}
return true;
}
/**
* Generates a bulk assignment plan to be used on cluster startup using a
* simple round-robin assignment.
@ -372,16 +395,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return Collections.unmodifiableMap(assignments);
}
protected final float normalizeSlop(float slop) {
if (slop < 0) {
return 0;
}
if (slop > 1) {
return 1;
}
return slop;
}
protected float getDefaultSlop() {
return 0.2f;
}
@ -394,7 +407,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
protected void loadConf(Configuration conf) {
this.slop = normalizeSlop(conf.getFloat("hbase.regions.slop", getDefaultSlop()));
this.slop = conf.getFloat("hbase.regions.slop", getDefaultSlop());
this.rackManager = new RackManager(conf);
useRegionFinder = conf.getBoolean("hbase.master.balancer.uselocality", true);
if (useRegionFinder) {

View File

@ -191,24 +191,9 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
if (idleRegionServerExist(c)) {
return true;
}
// Check if we even need to do any load balancing
// HBASE-3681 check sloppiness first
float average = cs.getLoadAverage(); // for logging
int floor = (int) Math.floor(average * (1 - slop));
int ceiling = (int) Math.ceil(average * (1 + slop));
if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
NavigableMap<ServerAndLoad, List<RegionInfo>> serversByLoad = cs.getServersByLoad();
if (LOG.isTraceEnabled()) {
// If nothing to balance, then don't say anything unless trace-level logging.
LOG.trace("Skipping load balancing because balanced cluster; " + "servers=" +
cs.getNumServers() + " regions=" + cs.getNumRegions() + " average=" + average +
" mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" +
serversByLoad.firstKey().getLoad());
}
return false;
}
return true;
return sloppyRegionServerExist(cs);
}
/**

View File

@ -203,11 +203,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
return this.candidateGenerators;
}
@Override
protected float getDefaultSlop() {
return 0.001f;
}
protected List<CandidateGenerator> createCandidateGenerators() {
List<CandidateGenerator> candidateGenerators = new ArrayList<CandidateGenerator>(4);
candidateGenerators.add(GeneratorType.RANDOM.ordinal(), new RandomCandidateGenerator());
@ -357,6 +352,12 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
return true;
}
if (sloppyRegionServerExist(cs)) {
LOG.info("Running balancer because cluster has sloppy server(s)."+
" function cost={}", functionCost());
return true;
}
double total = 0.0;
for (CostFunction c : costFunctions) {
if (!c.isNeeded()) {

View File

@ -137,6 +137,45 @@ public class BalancerTestBase {
};
// int[testnum][servernumber] -> numregions
protected int[][] clusterStateMocksWithNoSlop = new int[][] {
// 1 node
new int[]{0},
new int[]{1},
new int[]{10},
// 2 node
new int[]{0, 0},
new int[]{2, 1},
new int[]{2, 2},
new int[]{2, 3},
new int[]{1, 1},
new int[]{80, 120},
new int[]{1428, 1432},
// more nodes
new int[]{100, 90, 120, 90, 110, 100, 90, 120},
};
// int[testnum][servernumber] -> numregions
protected int[][] clusterStateMocksWithSlop = new int[][] {
// 2 node
new int[]{1, 4},
new int[]{10, 20},
new int[]{80, 123},
// more nodes
new int[]{100, 100, 100, 100, 100, 100, 100, 100, 100, 200},
new int[] {
10, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
},
};
// This class is introduced because IP to rack resolution can be lengthy.
public static class MockMapping implements DNSToSwitchMapping {
@ -351,6 +390,23 @@ public class BalancerTestBase {
map.put(sn, sal);
}
protected TreeMap<ServerName, List<RegionInfo>> mockClusterServers(int[][] mockCluster) {
// dimension1: table, dimension2: regions per server
int numTables = mockCluster.length;
TreeMap<ServerName, List<RegionInfo>> servers = new TreeMap<>();
for (int i = 0; i < numTables; i++) {
TableName tableName = TableName.valueOf("table" + i);
for (int j = 0; j < mockCluster[i].length; j++) {
ServerName serverName = ServerName.valueOf("server" + j, 1000, -1);
int numRegions = mockCluster[i][j];
List<RegionInfo> regions = createRegions(numRegions, tableName);
servers.putIfAbsent(serverName, new ArrayList<>());
servers.get(serverName).addAll(regions);
}
}
return servers;
}
protected TreeMap<ServerName, List<RegionInfo>> mockClusterServers(int[] mockCluster) {
return mockClusterServers(mockCluster, -1);
}

View File

@ -46,7 +46,6 @@ public class StochasticBalancerTestBase extends BalancerTestBase {
public static void beforeAllTests() throws Exception {
conf = HBaseConfiguration.create();
conf.setClass("hbase.util.ip.to.rack.determiner", MockMapping.class, DNSToSwitchMapping.class);
conf.setFloat("hbase.regions.slop", 0.0f);
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
loadBalancer = new StochasticLoadBalancer(dummyMetricsStochasticBalancer);

View File

@ -165,28 +165,19 @@ public class TestSimpleLoadBalancer extends BalancerTestBase {
}
@Test
public void testBalanceClusterOverallStrictly() throws Exception {
int[] regionNumOfTable1PerServer = { 3, 3, 4, 4, 4, 4, 5, 5, 5 };
int[] regionNumOfTable2PerServer = { 2, 2, 2, 2, 2, 2, 2, 2, 1 };
TreeMap<ServerName, List<RegionInfo>> serverRegionInfo = new TreeMap<>();
List<ServerAndLoad> serverAndLoads = new ArrayList<>();
for (int i = 0; i < regionNumOfTable1PerServer.length; i++) {
ServerName serverName = ServerName.valueOf("server" + i, 1000, -1);
List<RegionInfo> regions1 =
createRegions(regionNumOfTable1PerServer[i], TableName.valueOf("table1"));
List<RegionInfo> regions2 =
createRegions(regionNumOfTable2PerServer[i], TableName.valueOf("table2"));
regions1.addAll(regions2);
serverRegionInfo.put(serverName, regions1);
ServerAndLoad serverAndLoad = new ServerAndLoad(serverName,
regionNumOfTable1PerServer[i] + regionNumOfTable2PerServer[i]);
serverAndLoads.add(serverAndLoad);
}
HashMap<TableName, TreeMap<ServerName, List<RegionInfo>>> LoadOfAllTable =
public void testBalanceClusterOverallStrictly() {
int[][] regionsPerServerPerTable = new int[][]{
new int[]{ 3, 3, 4, 4, 4, 4, 5, 5, 5 },
new int[]{ 2, 2, 2, 2, 2, 2, 2, 2, 1 },
};
TreeMap<ServerName, List<RegionInfo>> serverRegionInfo =
mockClusterServers(regionsPerServerPerTable);
List<ServerAndLoad> serverAndLoads = convertToList(serverRegionInfo);
Map<TableName, TreeMap<ServerName, List<RegionInfo>>> loadOfAllTable =
mockClusterServersWithTables(serverRegionInfo);
loadBalancer.setClusterLoad((Map) LoadOfAllTable);
List<RegionPlan> partialplans = loadBalancer.balanceTable(TableName.valueOf("table1"),
LoadOfAllTable.get(TableName.valueOf("table1")));
loadBalancer.setClusterLoad((Map) loadOfAllTable);
List<RegionPlan> partialplans = loadBalancer.balanceTable(TableName.valueOf("table0"),
loadOfAllTable.get(TableName.valueOf("table0")));
List<ServerAndLoad> balancedServerLoads =
reconcile(serverAndLoads, partialplans, serverRegionInfo);
for (ServerAndLoad serverAndLoad : balancedServerLoads) {

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hbase.master.balancer;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
@ -327,7 +328,9 @@ public class TestStochasticLoadBalancer extends StochasticBalancerTestBase {
@Test
public void testNeedBalance() {
float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);
float slop = conf.getFloat(HConstants.LOAD_BALANCER_SLOP_KEY, 0.2f);
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f);
conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, -1f);
try {
// Test with/without per table balancer.
boolean[] perTableBalancerConfigs = {true, false};
@ -335,22 +338,80 @@ public class TestStochasticLoadBalancer extends StochasticBalancerTestBase {
conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
loadBalancer.onConfigurationChange(conf);
for (int[] mockCluster : clusterStateMocks) {
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(mockCluster);
Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable =
(Map) mockClusterServersWithTables(servers);
List<RegionPlan> plans = loadBalancer.balanceCluster(LoadOfAllTable);
boolean emptyPlans = plans == null || plans.isEmpty();
assertTrue(emptyPlans || needsBalanceIdleRegion(mockCluster));
assertTrue(hasEmptyBalancerPlans(mockCluster) || needsBalanceIdleRegion(mockCluster));
}
}
} finally {
// reset config
conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE);
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost);
conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, slop);
loadBalancer.onConfigurationChange(conf);
}
}
@Test
public void testBalanceOfSloppyServers() {
float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.025f);
try {
// We are testing slop checks, so don't "accidentally" balance due to a minCost calculation.
// During development, imbalance of a 100 server cluster, with one node having 10 regions
// and the rest having 5, is 0.0048. With minCostNeedBalance default of 0.025, test should
// validate slop checks without this override. We override just to ensure we will always
// validate slop check here, and for small clusters as well.
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f);
loadBalancer.onConfigurationChange(conf);
for (int[] mockCluster : clusterStateMocksWithNoSlop) {
assertTrue(hasEmptyBalancerPlans(mockCluster));
}
for (int[] mockCluster : clusterStateMocksWithSlop) {
assertFalse(hasEmptyBalancerPlans(mockCluster));
}
} finally {
// reset config
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost);
loadBalancer.onConfigurationChange(conf);
}
}
@Test
public void testSloppyTablesLoadBalanceByTable() {
int[][] regionsPerServerPerTable = new int[][] {
new int[] { 8, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
new int[] { 2, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
};
float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.025f);
try {
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f);
conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, true);
loadBalancer.onConfigurationChange(conf);
assertFalse(hasEmptyBalancerPlans(regionsPerServerPerTable));
} finally {
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost);
conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE);
loadBalancer.onConfigurationChange(conf);
}
}
private boolean hasEmptyBalancerPlans(int[] mockCluster) {
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(mockCluster);
return hasEmptyBalancerPlans(servers);
}
private boolean hasEmptyBalancerPlans(int[][] mockCluster) {
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(mockCluster);
return hasEmptyBalancerPlans(servers);
}
private boolean hasEmptyBalancerPlans(Map<ServerName, List<RegionInfo>> servers) {
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable =
(Map) mockClusterServersWithTables(servers);
List<RegionPlan> plans = loadBalancer.balanceCluster(loadOfAllTable);
return plans == null || plans.isEmpty();
}
@Test
public void testLocalityCost() throws Exception {
Configuration conf = HBaseConfiguration.create();

View File

@ -618,11 +618,20 @@ possible configurations would overwhelm and obscure the important.
</property>
<property>
<name>hbase.regions.slop</name>
<value>0.001</value>
<description>Rebalance if any regionserver has average + (average * slop) regions.
The default value of this parameter is 0.001 in StochasticLoadBalancer (the default load
balancer), while the default is 0.2 in other load balancers (i.e.,
SimpleLoadBalancer).</description>
<value>0.2</value>
<description>The load balancer can trigger for several reasons. This value controls one of
those reasons. Run the balancer if any regionserver has a region count outside the range of
average +/- (average * slop) regions.
If the value of slop is negative, disable sloppiness checks. The balancer can still run for
other reasons, but sloppiness will not be one of them.
If the value of slop is 0, run the balancer if any server has a region count more than 1
from the average.
If the value of slop is 100, run the balancer if any server has a region count greater than
101 times the average.
The default value of this parameter is 0.2, which runs the balancer if any server has a region
count less than 80% of the average, or greater than 120% of the average.
Note that for the default StochasticLoadBalancer, this does not guarantee any balancing
actions will be taken, but only that the balancer will attempt to run.</description>
</property>
<property>
<name>hbase.normalizer.period</name>

View File

@ -65,7 +65,9 @@ public class TestBalancerDecision extends StochasticBalancerTestBase {
loadBalancer.setClusterInfoProvider(provider);
loadBalancer.onConfigurationChange(conf);
float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);
float slop = conf.getFloat(HConstants.LOAD_BALANCER_SLOP_KEY, 0.2f);
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f);
conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, -1f);
try {
// Test with/without per table balancer.
boolean[] perTableBalancerConfigs = {true, false};
@ -100,6 +102,7 @@ public class TestBalancerDecision extends StochasticBalancerTestBase {
// reset config
conf.unset(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE);
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost);
conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, slop);
loadBalancer.onConfigurationChange(conf);
}
}