HBASE-12139 StochasticLoadBalancer doesn't work on large lightly loaded clusters

Summary:
Currently the move cost overshadows the skew cost on a large cluster. This can render the split policy worse than useless
and it can trap meta on the most loaded server in the cluster.

Test Plan:
Unit tests everywhere
likeaboss

Reviewers: stack, manukranthk

Subscribers: tedyu

Differential Revision: https://reviews.facebook.net/D24285
This commit is contained in:
Elliott Clark 2014-10-01 00:12:23 -07:00
parent 1587068a2c
commit eb385abfae
4 changed files with 58 additions and 26 deletions

View File

@ -1,5 +1,5 @@
{ {
"project_id" : "hbase", "project_id" : "HBaseOnGithub",
"conduit_uri" : "https://reviews.facebook.net/", "conduit_uri" : "https://reviews.facebook.net/",
"copyright_holder" : "Apache Software Foundation", "copyright_holder" : "Apache Software Foundation",
"max_line_length" : 100 "max_line_length" : 100

View File

@ -808,9 +808,24 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
// Compute max as if all region servers had 0 and one had the sum of all costs. This must be // Compute max as if all region servers had 0 and one had the sum of all costs. This must be
// a zero sum cost for this to make sense. // a zero sum cost for this to make sense.
// TODO: Should we make this sum of square errors?
double max = ((count - 1) * mean) + (total - mean); double max = ((count - 1) * mean) + (total - mean);
// It's possible that there aren't enough regions to go around
double min;
if (count > total) {
min = ((count - total) * mean) + ((1 - mean) * total);
} else {
// Some will have 1 more than everything else.
int numHigh = (int) (total - (Math.floor(mean) * count));
int numLow = (int) (count - numHigh);
min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
}
min = Math.max(0, min);
for (int i=0; i<stats.length; i++) { for (int i=0; i<stats.length; i++) {
if (stats.length > 1 && cluster.isActiveMaster(i)) { if (stats.length > 1 && cluster.isActiveMaster(i)) {
// Not count the active master load // Not count the active master load
continue; continue;
@ -820,7 +835,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
totalCost += diff; totalCost += diff;
} }
double scaled = scale(0, max, totalCost); double scaled = scale(min, max, totalCost);
return scaled; return scaled;
} }
@ -844,6 +859,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
if (max <= min || value <= min) { if (max <= min || value <= min) {
return 0; return 0;
} }
if ((max - min) == 0) return 0;
return Math.max(0d, Math.min(1d, (value - min) / (max - min))); return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
} }
@ -919,6 +935,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
for (int i =0; i < cluster.numServers; i++) { for (int i =0; i < cluster.numServers; i++) {
stats[i] = cluster.regionsPerServer[i].length; stats[i] = cluster.regionsPerServer[i].length;
} }
return costFromArray(stats); return costFromArray(stats);
} }
} }

View File

@ -180,20 +180,22 @@ public class BalancerTestBase {
List<RegionPlan> plans, List<RegionPlan> plans,
Map<ServerName, List<HRegionInfo>> servers) { Map<ServerName, List<HRegionInfo>> servers) {
List<ServerAndLoad> result = new ArrayList<ServerAndLoad>(list.size()); List<ServerAndLoad> result = new ArrayList<ServerAndLoad>(list.size());
if (plans == null) return result;
Map<ServerName, ServerAndLoad> map = new HashMap<ServerName, ServerAndLoad>(list.size()); Map<ServerName, ServerAndLoad> map = new HashMap<ServerName, ServerAndLoad>(list.size());
for (ServerAndLoad sl : list) { for (ServerAndLoad sl : list) {
map.put(sl.getServerName(), sl); map.put(sl.getServerName(), sl);
} }
for (RegionPlan plan : plans) { if (plans != null) {
ServerName source = plan.getSource(); for (RegionPlan plan : plans) {
ServerName source = plan.getSource();
updateLoad(map, source, -1); updateLoad(map, source, -1);
ServerName destination = plan.getDestination(); ServerName destination = plan.getDestination();
updateLoad(map, destination, +1); updateLoad(map, destination, +1);
servers.get(source).remove(plan.getRegionInfo()); servers.get(source).remove(plan.getRegionInfo());
servers.get(destination).add(plan.getRegionInfo()); servers.get(destination).add(plan.getRegionInfo());
}
} }
result.clear(); result.clear();
result.addAll(map.values()); result.addAll(map.values());

View File

@ -66,10 +66,26 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
public static void beforeAllTests() throws Exception { public static void beforeAllTests() throws Exception {
conf = HBaseConfiguration.create(); conf = HBaseConfiguration.create();
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setFloat("hbase.regions.slop", 0.0f);
loadBalancer = new StochasticLoadBalancer(); loadBalancer = new StochasticLoadBalancer();
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
} }
int[] largeCluster = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 56 };
// int[testnum][servernumber] -> numregions // int[testnum][servernumber] -> numregions
int[][] clusterStateMocks = new int[][]{ int[][] clusterStateMocks = new int[][]{
// 1 node // 1 node
@ -87,7 +103,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
new int[]{0, 1}, new int[]{0, 1},
new int[]{10, 1}, new int[]{10, 1},
new int[]{514, 1432}, new int[]{514, 1432},
new int[]{47, 53}, new int[]{48, 53},
// 3 node // 3 node
new int[]{0, 1, 2}, new int[]{0, 1, 2},
new int[]{1, 2, 3}, new int[]{1, 2, 3},
@ -124,7 +140,9 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
new int[]{10, 7, 12, 8, 11, 10, 9, 14}, new int[]{10, 7, 12, 8, 11, 10, 9, 14},
new int[]{13, 14, 6, 10, 10, 10, 8, 10}, new int[]{13, 14, 6, 10, 10, 10, 8, 10},
new int[]{130, 14, 60, 10, 100, 10, 80, 10}, new int[]{130, 14, 60, 10, 100, 10, 80, 10},
new int[]{130, 140, 60, 100, 100, 100, 80, 100} new int[]{130, 140, 60, 100, 100, 100, 80, 100},
largeCluster,
}; };
@Test @Test
@ -201,24 +219,19 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
assertTrue(cost >= 0); assertTrue(cost >= 0);
assertTrue(cost <= 1.01); assertTrue(cost <= 1.01);
} }
costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1})); costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1}));
assertEquals(1, assertEquals(0,costFunction.cost(), 0.01);
costFunction.cost(), 0.01);
costFunction.init(mockCluster(new int[]{0, 0, 0, 1, 1})); costFunction.init(mockCluster(new int[]{0, 0, 0, 1, 1}));
assertEquals(.75, assertEquals(0, costFunction.cost(), 0.01);
costFunction.cost(), 0.01);
costFunction.init(mockCluster(new int[]{0, 0, 1, 1, 1})); costFunction.init(mockCluster(new int[]{0, 0, 1, 1, 1}));
assertEquals(.5, assertEquals(0, costFunction.cost(), 0.01);
costFunction.cost(), 0.01);
costFunction.init(mockCluster(new int[]{0, 1, 1, 1, 1})); costFunction.init(mockCluster(new int[]{0, 1, 1, 1, 1}));
assertEquals(.25, assertEquals(0, costFunction.cost(), 0.01);
costFunction.cost(), 0.01);
costFunction.init(mockCluster(new int[]{1, 1, 1, 1, 1})); costFunction.init(mockCluster(new int[]{1, 1, 1, 1, 1}));
assertEquals(0, assertEquals(0, costFunction.cost(), 0.01);
costFunction.cost(), 0.01); costFunction.init(mockCluster(new int[]{10000, 0, 0, 0, 0}));
costFunction.init(mockCluster(new int[]{10, 10, 10, 10, 10})); assertEquals(1, costFunction.cost(), 0.01);
assertEquals(0,
costFunction.cost(), 0.01);
} }
@Test @Test