HBASE-12139 StochasticLoadBalancer doesn't work on large lightly loaded clusters
Summary: Currently the move cost overshadows the skew cost on a large cluster. This can render the split policy worse than useless and it can trap meta on the most loaded server in the cluster. Test Plan: Unit tests everywhere likeaboss Reviewers: stack, manukranthk Subscribers: tedyu Differential Revision: https://reviews.facebook.net/D24285
This commit is contained in:
parent
1587068a2c
commit
eb385abfae
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"project_id" : "hbase",
|
||||
"project_id" : "HBaseOnGithub",
|
||||
"conduit_uri" : "https://reviews.facebook.net/",
|
||||
"copyright_holder" : "Apache Software Foundation",
|
||||
"max_line_length" : 100
|
||||
|
|
|
@ -808,9 +808,24 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
|
||||
// Compute max as if all region servers had 0 and one had the sum of all costs. This must be
|
||||
// a zero sum cost for this to make sense.
|
||||
// TODO: Should we make this sum of square errors?
|
||||
double max = ((count - 1) * mean) + (total - mean);
|
||||
|
||||
// It's possible that there aren't enough regions to go around
|
||||
double min;
|
||||
if (count > total) {
|
||||
min = ((count - total) * mean) + ((1 - mean) * total);
|
||||
} else {
|
||||
// Some will have 1 more than everything else.
|
||||
int numHigh = (int) (total - (Math.floor(mean) * count));
|
||||
int numLow = (int) (count - numHigh);
|
||||
|
||||
min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
|
||||
|
||||
}
|
||||
min = Math.max(0, min);
|
||||
for (int i=0; i<stats.length; i++) {
|
||||
|
||||
|
||||
if (stats.length > 1 && cluster.isActiveMaster(i)) {
|
||||
// Not count the active master load
|
||||
continue;
|
||||
|
@ -820,7 +835,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
totalCost += diff;
|
||||
}
|
||||
|
||||
double scaled = scale(0, max, totalCost);
|
||||
double scaled = scale(min, max, totalCost);
|
||||
return scaled;
|
||||
}
|
||||
|
||||
|
@ -844,6 +859,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
if (max <= min || value <= min) {
|
||||
return 0;
|
||||
}
|
||||
if ((max - min) == 0) return 0;
|
||||
|
||||
return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
|
||||
}
|
||||
|
@ -919,6 +935,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
for (int i =0; i < cluster.numServers; i++) {
|
||||
stats[i] = cluster.regionsPerServer[i].length;
|
||||
}
|
||||
|
||||
return costFromArray(stats);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -180,20 +180,22 @@ public class BalancerTestBase {
|
|||
List<RegionPlan> plans,
|
||||
Map<ServerName, List<HRegionInfo>> servers) {
|
||||
List<ServerAndLoad> result = new ArrayList<ServerAndLoad>(list.size());
|
||||
if (plans == null) return result;
|
||||
|
||||
Map<ServerName, ServerAndLoad> map = new HashMap<ServerName, ServerAndLoad>(list.size());
|
||||
for (ServerAndLoad sl : list) {
|
||||
map.put(sl.getServerName(), sl);
|
||||
}
|
||||
for (RegionPlan plan : plans) {
|
||||
ServerName source = plan.getSource();
|
||||
if (plans != null) {
|
||||
for (RegionPlan plan : plans) {
|
||||
ServerName source = plan.getSource();
|
||||
|
||||
updateLoad(map, source, -1);
|
||||
ServerName destination = plan.getDestination();
|
||||
updateLoad(map, destination, +1);
|
||||
updateLoad(map, source, -1);
|
||||
ServerName destination = plan.getDestination();
|
||||
updateLoad(map, destination, +1);
|
||||
|
||||
servers.get(source).remove(plan.getRegionInfo());
|
||||
servers.get(destination).add(plan.getRegionInfo());
|
||||
servers.get(source).remove(plan.getRegionInfo());
|
||||
servers.get(destination).add(plan.getRegionInfo());
|
||||
}
|
||||
}
|
||||
result.clear();
|
||||
result.addAll(map.values());
|
||||
|
|
|
@ -66,10 +66,26 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
|||
public static void beforeAllTests() throws Exception {
|
||||
conf = HBaseConfiguration.create();
|
||||
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
|
||||
conf.setFloat("hbase.regions.slop", 0.0f);
|
||||
loadBalancer = new StochasticLoadBalancer();
|
||||
loadBalancer.setConf(conf);
|
||||
}
|
||||
|
||||
int[] largeCluster = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 56 };
|
||||
|
||||
// int[testnum][servernumber] -> numregions
|
||||
int[][] clusterStateMocks = new int[][]{
|
||||
// 1 node
|
||||
|
@ -87,7 +103,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
|||
new int[]{0, 1},
|
||||
new int[]{10, 1},
|
||||
new int[]{514, 1432},
|
||||
new int[]{47, 53},
|
||||
new int[]{48, 53},
|
||||
// 3 node
|
||||
new int[]{0, 1, 2},
|
||||
new int[]{1, 2, 3},
|
||||
|
@ -124,7 +140,9 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
|||
new int[]{10, 7, 12, 8, 11, 10, 9, 14},
|
||||
new int[]{13, 14, 6, 10, 10, 10, 8, 10},
|
||||
new int[]{130, 14, 60, 10, 100, 10, 80, 10},
|
||||
new int[]{130, 140, 60, 100, 100, 100, 80, 100}
|
||||
new int[]{130, 140, 60, 100, 100, 100, 80, 100},
|
||||
largeCluster,
|
||||
|
||||
};
|
||||
|
||||
@Test
|
||||
|
@ -201,24 +219,19 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
|||
assertTrue(cost >= 0);
|
||||
assertTrue(cost <= 1.01);
|
||||
}
|
||||
|
||||
costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1}));
|
||||
assertEquals(1,
|
||||
costFunction.cost(), 0.01);
|
||||
assertEquals(0,costFunction.cost(), 0.01);
|
||||
costFunction.init(mockCluster(new int[]{0, 0, 0, 1, 1}));
|
||||
assertEquals(.75,
|
||||
costFunction.cost(), 0.01);
|
||||
assertEquals(0, costFunction.cost(), 0.01);
|
||||
costFunction.init(mockCluster(new int[]{0, 0, 1, 1, 1}));
|
||||
assertEquals(.5,
|
||||
costFunction.cost(), 0.01);
|
||||
assertEquals(0, costFunction.cost(), 0.01);
|
||||
costFunction.init(mockCluster(new int[]{0, 1, 1, 1, 1}));
|
||||
assertEquals(.25,
|
||||
costFunction.cost(), 0.01);
|
||||
assertEquals(0, costFunction.cost(), 0.01);
|
||||
costFunction.init(mockCluster(new int[]{1, 1, 1, 1, 1}));
|
||||
assertEquals(0,
|
||||
costFunction.cost(), 0.01);
|
||||
costFunction.init(mockCluster(new int[]{10, 10, 10, 10, 10}));
|
||||
assertEquals(0,
|
||||
costFunction.cost(), 0.01);
|
||||
assertEquals(0, costFunction.cost(), 0.01);
|
||||
costFunction.init(mockCluster(new int[]{10000, 0, 0, 0, 0}));
|
||||
assertEquals(1, costFunction.cost(), 0.01);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue