HBASE-8517 Stochastic Loadbalancer isn't finding steady state on real clusters
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1486258 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c19b7ba9d9
commit
4ff4038b79
|
@ -18,6 +18,8 @@
|
||||||
package org.apache.hadoop.hbase.master.balancer;
|
package org.apache.hadoop.hbase.master.balancer;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -58,7 +60,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
*/
|
*/
|
||||||
protected static class Cluster {
|
protected static class Cluster {
|
||||||
ServerName[] servers;
|
ServerName[] servers;
|
||||||
ArrayList<byte[]> tables;
|
ArrayList<String> tables;
|
||||||
HRegionInfo[] regions;
|
HRegionInfo[] regions;
|
||||||
List<RegionLoad>[] regionLoads;
|
List<RegionLoad>[] regionLoads;
|
||||||
int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
|
int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
|
||||||
|
@ -70,8 +72,10 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
|
int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
|
||||||
int[] numMaxRegionsPerTable; //tableIndex -> max number of regions in a single RS
|
int[] numMaxRegionsPerTable; //tableIndex -> max number of regions in a single RS
|
||||||
|
|
||||||
Map<ServerName, Integer> serversToIndex;
|
Integer[] serverIndicesSortedByRegionCount;
|
||||||
Map<Integer, Integer> tablesToIndex;
|
|
||||||
|
Map<String, Integer> serversToIndex;
|
||||||
|
Map<String, Integer> tablesToIndex;
|
||||||
|
|
||||||
int numRegions;
|
int numRegions;
|
||||||
int numServers;
|
int numServers;
|
||||||
|
@ -82,21 +86,35 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
|
|
||||||
protected Cluster(Map<ServerName, List<HRegionInfo>> clusterState, Map<String, List<RegionLoad>> loads,
|
protected Cluster(Map<ServerName, List<HRegionInfo>> clusterState, Map<String, List<RegionLoad>> loads,
|
||||||
RegionLocationFinder regionFinder) {
|
RegionLocationFinder regionFinder) {
|
||||||
serversToIndex = new HashMap<ServerName, Integer>(clusterState.size());
|
|
||||||
tablesToIndex = new HashMap<Integer, Integer>();
|
serversToIndex = new HashMap<String, Integer>();
|
||||||
|
tablesToIndex = new HashMap<String, Integer>();
|
||||||
//regionsToIndex = new HashMap<HRegionInfo, Integer>();
|
//regionsToIndex = new HashMap<HRegionInfo, Integer>();
|
||||||
|
|
||||||
//TODO: We should get the list of tables from master
|
//TODO: We should get the list of tables from master
|
||||||
tables = new ArrayList<byte[]>();
|
tables = new ArrayList<String>();
|
||||||
|
|
||||||
|
|
||||||
numServers = clusterState.size();
|
|
||||||
numRegions = 0;
|
numRegions = 0;
|
||||||
|
|
||||||
|
int serverIndex = 0;
|
||||||
|
|
||||||
|
// Use servername and port as there can be dead servers in this list. We want everything with
|
||||||
|
// a matching hostname and port to have the same index.
|
||||||
|
for (ServerName sn:clusterState.keySet()) {
|
||||||
|
if (serversToIndex.get(sn.getHostAndPort()) == null) {
|
||||||
|
serversToIndex.put(sn.getHostAndPort(), serverIndex++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count how many regions there are.
|
||||||
for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
|
for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
|
||||||
numRegions += entry.getValue().size();
|
numRegions += entry.getValue().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
regionsPerServer = new int[clusterState.size()][];
|
numServers = serversToIndex.size();
|
||||||
|
regionsPerServer = new int[serversToIndex.size()][];
|
||||||
|
|
||||||
servers = new ServerName[numServers];
|
servers = new ServerName[numServers];
|
||||||
regions = new HRegionInfo[numRegions];
|
regions = new HRegionInfo[numRegions];
|
||||||
regionIndexToServerIndex = new int[numRegions];
|
regionIndexToServerIndex = new int[numRegions];
|
||||||
|
@ -104,26 +122,35 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
regionIndexToTableIndex = new int[numRegions];
|
regionIndexToTableIndex = new int[numRegions];
|
||||||
regionLoads = new List[numRegions];
|
regionLoads = new List[numRegions];
|
||||||
regionLocations = new int[numRegions][];
|
regionLocations = new int[numRegions][];
|
||||||
|
serverIndicesSortedByRegionCount = new Integer[numServers];
|
||||||
|
|
||||||
|
int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
|
||||||
|
|
||||||
int tableIndex = 0, serverIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
|
|
||||||
// populate serversToIndex first
|
|
||||||
for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
|
for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
|
||||||
|
serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
|
||||||
|
|
||||||
|
// keep the servername if this is the first server name for this hostname
|
||||||
|
// or this servername has the newest startcode.
|
||||||
|
if (servers[serverIndex] == null ||
|
||||||
|
servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
|
||||||
servers[serverIndex] = entry.getKey();
|
servers[serverIndex] = entry.getKey();
|
||||||
regionsPerServer[serverIndex] = new int[entry.getValue().size()];
|
|
||||||
serversToIndex.put(servers[serverIndex], Integer.valueOf(serverIndex));
|
|
||||||
serverIndex++;
|
|
||||||
}
|
}
|
||||||
serverIndex = 0;
|
|
||||||
|
regionsPerServer[serverIndex] = new int[entry.getValue().size()];
|
||||||
|
serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
|
||||||
|
}
|
||||||
|
|
||||||
for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
|
for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
|
||||||
|
serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
|
||||||
regionPerServerIndex = 0;
|
regionPerServerIndex = 0;
|
||||||
|
|
||||||
for (HRegionInfo region : entry.getValue()) {
|
for (HRegionInfo region : entry.getValue()) {
|
||||||
byte[] tableName = region.getTableName();
|
String tableName = region.getTableNameAsString();
|
||||||
int tableHash = Bytes.mapKey(tableName);
|
Integer idx = tablesToIndex.get(tableName);
|
||||||
Integer idx = tablesToIndex.get(tableHash);
|
|
||||||
if (idx == null) {
|
if (idx == null) {
|
||||||
tables.add(tableName);
|
tables.add(tableName);
|
||||||
idx = tableIndex;
|
idx = tableIndex;
|
||||||
tablesToIndex.put(tableHash, tableIndex++);
|
tablesToIndex.put(tableName, tableIndex++);
|
||||||
}
|
}
|
||||||
|
|
||||||
regions[regionIndex] = region;
|
regions[regionIndex] = region;
|
||||||
|
@ -132,7 +159,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
regionIndexToTableIndex[regionIndex] = idx;
|
regionIndexToTableIndex[regionIndex] = idx;
|
||||||
regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
|
regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
|
||||||
|
|
||||||
//region load
|
// region load
|
||||||
if (loads != null) {
|
if (loads != null) {
|
||||||
List<RegionLoad> rl = loads.get(region.getRegionNameAsString());
|
List<RegionLoad> rl = loads.get(region.getRegionNameAsString());
|
||||||
// That could have failed if the RegionLoad is using the other regionName
|
// That could have failed if the RegionLoad is using the other regionName
|
||||||
|
@ -156,7 +183,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
|
|
||||||
regionIndex++;
|
regionIndex++;
|
||||||
}
|
}
|
||||||
serverIndex++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
numTables = tables.size();
|
numTables = tables.size();
|
||||||
|
@ -263,6 +289,53 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
}
|
}
|
||||||
return regions;
|
return regions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sortServersByRegionCount() {
|
||||||
|
Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getNumRegions(int server) {
|
||||||
|
return regionsPerServer[server].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
|
||||||
|
@Override
|
||||||
|
public int compare(Integer integer, Integer integer2) {
|
||||||
|
return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
String desc = "Cluster{" +
|
||||||
|
"servers=[";
|
||||||
|
for(ServerName sn:servers) {
|
||||||
|
desc += sn.getHostAndPort() + ", ";
|
||||||
|
}
|
||||||
|
desc +=
|
||||||
|
", serverIndicesSortedByRegionCount="+
|
||||||
|
Arrays.toString(serverIndicesSortedByRegionCount) +
|
||||||
|
", regionsPerServer=[";
|
||||||
|
|
||||||
|
for (int[]r:regionsPerServer) {
|
||||||
|
desc += Arrays.toString(r);
|
||||||
|
}
|
||||||
|
desc += "]" +
|
||||||
|
", numMaxRegionsPerTable=" +
|
||||||
|
Arrays.toString(numMaxRegionsPerTable) +
|
||||||
|
", numRegions=" +
|
||||||
|
numRegions +
|
||||||
|
", numServers=" +
|
||||||
|
numServers +
|
||||||
|
", numTables=" +
|
||||||
|
numTables +
|
||||||
|
", numMovedRegions=" +
|
||||||
|
numMovedRegions +
|
||||||
|
", numMovedMetaRegions=" +
|
||||||
|
numMovedMetaRegions +
|
||||||
|
'}';
|
||||||
|
return desc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// slop for regions
|
// slop for regions
|
||||||
|
@ -270,7 +343,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
||||||
private Configuration config;
|
private Configuration config;
|
||||||
private static final Random RANDOM = new Random(System.currentTimeMillis());
|
private static final Random RANDOM = new Random(System.currentTimeMillis());
|
||||||
private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
|
private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
|
||||||
|
|
||||||
protected MasterServices services;
|
protected MasterServices services;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -43,7 +43,7 @@ import org.apache.hadoop.hbase.util.Bytes;
|
||||||
*/
|
*/
|
||||||
public class BalancerTestBase {
|
public class BalancerTestBase {
|
||||||
|
|
||||||
private static Random rand = new Random();
|
protected static Random rand = new Random();
|
||||||
static int regionId = 0;
|
static int regionId = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -125,7 +125,9 @@ public class BalancerTestBase {
|
||||||
* @param plans
|
* @param plans
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
protected List<ServerAndLoad> reconcile(List<ServerAndLoad> list, List<RegionPlan> plans) {
|
protected List<ServerAndLoad> reconcile(List<ServerAndLoad> list,
|
||||||
|
List<RegionPlan> plans,
|
||||||
|
Map<ServerName, List<HRegionInfo>> servers) {
|
||||||
List<ServerAndLoad> result = new ArrayList<ServerAndLoad>(list.size());
|
List<ServerAndLoad> result = new ArrayList<ServerAndLoad>(list.size());
|
||||||
if (plans == null) return result;
|
if (plans == null) return result;
|
||||||
Map<ServerName, ServerAndLoad> map = new HashMap<ServerName, ServerAndLoad>(list.size());
|
Map<ServerName, ServerAndLoad> map = new HashMap<ServerName, ServerAndLoad>(list.size());
|
||||||
|
@ -134,9 +136,13 @@ public class BalancerTestBase {
|
||||||
}
|
}
|
||||||
for (RegionPlan plan : plans) {
|
for (RegionPlan plan : plans) {
|
||||||
ServerName source = plan.getSource();
|
ServerName source = plan.getSource();
|
||||||
|
|
||||||
updateLoad(map, source, -1);
|
updateLoad(map, source, -1);
|
||||||
ServerName destination = plan.getDestination();
|
ServerName destination = plan.getDestination();
|
||||||
updateLoad(map, destination, +1);
|
updateLoad(map, destination, +1);
|
||||||
|
|
||||||
|
servers.get(source).remove(plan.getRegionInfo());
|
||||||
|
servers.get(destination).add(plan.getRegionInfo());
|
||||||
}
|
}
|
||||||
result.clear();
|
result.clear();
|
||||||
result.addAll(map.values());
|
result.addAll(map.values());
|
||||||
|
|
|
@ -116,7 +116,7 @@ public class TestDefaultLoadBalancer extends BalancerTestBase {
|
||||||
List<ServerAndLoad> list = convertToList(servers);
|
List<ServerAndLoad> list = convertToList(servers);
|
||||||
LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
|
LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
|
||||||
List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
|
List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
|
||||||
List<ServerAndLoad> balancedCluster = reconcile(list, plans);
|
List<ServerAndLoad> balancedCluster = reconcile(list, plans, servers);
|
||||||
LOG.info("Mock Balance : " + printMock(balancedCluster));
|
LOG.info("Mock Balance : " + printMock(balancedCluster));
|
||||||
assertClusterAsBalanced(balancedCluster);
|
assertClusterAsBalanced(balancedCluster);
|
||||||
for (Map.Entry<ServerName, List<HRegionInfo>> entry : servers.entrySet()) {
|
for (Map.Entry<ServerName, List<HRegionInfo>> entry : servers.entrySet()) {
|
||||||
|
|
|
@ -19,10 +19,14 @@ package org.apache.hadoop.hbase.master.balancer;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -34,7 +38,6 @@ import org.apache.hadoop.hbase.MediumTests;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.master.RegionPlan;
|
import org.apache.hadoop.hbase.master.RegionPlan;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Ignore;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
@ -46,6 +49,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeAllTests() throws Exception {
|
public static void beforeAllTests() throws Exception {
|
||||||
Configuration conf = HBaseConfiguration.create();
|
Configuration conf = HBaseConfiguration.create();
|
||||||
|
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
|
||||||
loadBalancer = new StochasticLoadBalancer();
|
loadBalancer = new StochasticLoadBalancer();
|
||||||
loadBalancer.setConf(conf);
|
loadBalancer.setConf(conf);
|
||||||
}
|
}
|
||||||
|
@ -101,6 +105,10 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
||||||
new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 10},
|
new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 10},
|
||||||
new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 123},
|
new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 123},
|
||||||
new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 155},
|
new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 155},
|
||||||
|
new int[]{10, 7, 12, 8, 11, 10, 9, 14},
|
||||||
|
new int[]{13, 14, 6, 10, 10, 10, 8, 10},
|
||||||
|
new int[]{130, 14, 60, 10, 100, 10, 80, 10},
|
||||||
|
new int[]{130, 140, 60, 100, 100, 100, 80, 100}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -119,9 +127,11 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
||||||
List<ServerAndLoad> list = convertToList(servers);
|
List<ServerAndLoad> list = convertToList(servers);
|
||||||
LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
|
LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
|
||||||
List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
|
List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
|
||||||
List<ServerAndLoad> balancedCluster = reconcile(list, plans);
|
List<ServerAndLoad> balancedCluster = reconcile(list, plans, servers);
|
||||||
LOG.info("Mock Balance : " + printMock(balancedCluster));
|
LOG.info("Mock Balance : " + printMock(balancedCluster));
|
||||||
assertClusterAsBalanced(balancedCluster);
|
assertClusterAsBalanced(balancedCluster);
|
||||||
|
List<RegionPlan> secondPlans = loadBalancer.balanceCluster(servers);
|
||||||
|
assertNull(secondPlans);
|
||||||
for (Map.Entry<ServerName, List<HRegionInfo>> entry : servers.entrySet()) {
|
for (Map.Entry<ServerName, List<HRegionInfo>> entry : servers.entrySet()) {
|
||||||
returnRegions(entry.getValue());
|
returnRegions(entry.getValue());
|
||||||
returnServer(entry.getKey());
|
returnServer(entry.getKey());
|
||||||
|
@ -132,56 +142,96 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSkewCost() {
|
public void testSkewCost() {
|
||||||
|
Configuration conf = HBaseConfiguration.create();
|
||||||
|
StochasticLoadBalancer.CostFunction
|
||||||
|
costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf);
|
||||||
for (int[] mockCluster : clusterStateMocks) {
|
for (int[] mockCluster : clusterStateMocks) {
|
||||||
double cost = loadBalancer.computeSkewLoadCost(mockCluster(mockCluster));
|
double cost = costFunction.cost(mockCluster(mockCluster));
|
||||||
assertTrue(cost >= 0);
|
assertTrue(cost >= 0);
|
||||||
assertTrue(cost <= 1.01);
|
assertTrue(cost <= 1.01);
|
||||||
}
|
}
|
||||||
assertEquals(1,
|
assertEquals(1,
|
||||||
loadBalancer.computeSkewLoadCost(mockCluster(new int[] { 0, 0, 0, 0, 1 })), 0.01);
|
costFunction.cost(mockCluster(new int[]{0, 0, 0, 0, 1})), 0.01);
|
||||||
assertEquals(.75,
|
assertEquals(.75,
|
||||||
loadBalancer.computeSkewLoadCost(mockCluster(new int[] { 0, 0, 0, 1, 1 })), 0.01);
|
costFunction.cost(mockCluster(new int[]{0, 0, 0, 1, 1})), 0.01);
|
||||||
assertEquals(.5,
|
assertEquals(.5,
|
||||||
loadBalancer.computeSkewLoadCost(mockCluster(new int[] { 0, 0, 1, 1, 1 })), 0.01);
|
costFunction.cost(mockCluster(new int[]{0, 0, 1, 1, 1})), 0.01);
|
||||||
assertEquals(.25,
|
assertEquals(.25,
|
||||||
loadBalancer.computeSkewLoadCost(mockCluster(new int[] { 0, 1, 1, 1, 1 })), 0.01);
|
costFunction.cost(mockCluster(new int[]{0, 1, 1, 1, 1})), 0.01);
|
||||||
assertEquals(0,
|
assertEquals(0,
|
||||||
loadBalancer.computeSkewLoadCost(mockCluster(new int[] { 1, 1, 1, 1, 1 })), 0.01);
|
costFunction.cost(mockCluster(new int[]{1, 1, 1, 1, 1})), 0.01);
|
||||||
assertEquals(0,
|
assertEquals(0,
|
||||||
loadBalancer.computeSkewLoadCost(mockCluster(new int[] { 10, 10, 10, 10, 10 })), 0.01);
|
costFunction.cost(mockCluster(new int[]{10, 10, 10, 10, 10})), 0.01);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTableSkewCost() {
|
public void testTableSkewCost() {
|
||||||
|
Configuration conf = HBaseConfiguration.create();
|
||||||
|
StochasticLoadBalancer.CostFunction
|
||||||
|
costFunction = new StochasticLoadBalancer.TableSkewCostFunction(conf);
|
||||||
for (int[] mockCluster : clusterStateMocks) {
|
for (int[] mockCluster : clusterStateMocks) {
|
||||||
BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
|
BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
|
||||||
double cost = loadBalancer.computeTableSkewLoadCost(cluster);
|
double cost = costFunction.cost(cluster);
|
||||||
assertTrue(cost >= 0);
|
assertTrue(cost >= 0);
|
||||||
assertTrue(cost <= 1.01);
|
assertTrue(cost <= 1.01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCostFromStats() {
|
public void testCostFromArray() {
|
||||||
DescriptiveStatistics statOne = new DescriptiveStatistics();
|
Configuration conf = HBaseConfiguration.create();
|
||||||
for (int i =0; i < 100; i++) {
|
StochasticLoadBalancer.CostFromRegionLoadFunction
|
||||||
statOne.addValue(10);
|
costFunction = new StochasticLoadBalancer.MemstoreSizeCostFunction(conf);
|
||||||
}
|
|
||||||
assertEquals(0, loadBalancer.costFromStats(statOne), 0.01);
|
|
||||||
|
|
||||||
DescriptiveStatistics statTwo = new DescriptiveStatistics();
|
double[] statOne = new double[100];
|
||||||
for (int i =0; i < 100; i++) {
|
for (int i =0; i < 100; i++) {
|
||||||
statTwo.addValue(0);
|
statOne[i] = 10;
|
||||||
}
|
}
|
||||||
statTwo.addValue(100);
|
assertEquals(0, costFunction.costFromArray(statOne), 0.01);
|
||||||
assertEquals(1, loadBalancer.costFromStats(statTwo), 0.01);
|
|
||||||
|
|
||||||
DescriptiveStatistics statThree = new DescriptiveStatistics();
|
double[] statTwo= new double[101];
|
||||||
for (int i =0; i < 100; i++) {
|
for (int i =0; i < 100; i++) {
|
||||||
statThree.addValue(0);
|
statTwo[i] = 0;
|
||||||
statThree.addValue(100);
|
|
||||||
}
|
}
|
||||||
assertEquals(0.5, loadBalancer.costFromStats(statThree), 0.01);
|
statTwo[100] = 100;
|
||||||
|
assertEquals(1, costFunction.costFromArray(statTwo), 0.01);
|
||||||
|
|
||||||
|
double[] statThree = new double[200];
|
||||||
|
for (int i =0; i < 100; i++) {
|
||||||
|
statThree[i] = (0);
|
||||||
|
statThree[i+100] = 100;
|
||||||
|
}
|
||||||
|
assertEquals(0.5, costFunction.costFromArray(statThree), 0.01);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testLosingRs() throws Exception {
|
||||||
|
int numNodes = 3;
|
||||||
|
int numRegions = 20;
|
||||||
|
int numRegionsPerServer = 3; //all servers except one
|
||||||
|
int numTables = 2;
|
||||||
|
|
||||||
|
Map<ServerName, List<HRegionInfo>> serverMap =
|
||||||
|
createServerMap(numNodes, numRegions, numRegionsPerServer, numTables);
|
||||||
|
List<ServerAndLoad> list = convertToList(serverMap);
|
||||||
|
|
||||||
|
|
||||||
|
List<RegionPlan> plans = loadBalancer.balanceCluster(serverMap);
|
||||||
|
assertNotNull(plans);
|
||||||
|
|
||||||
|
// Apply the plan to the mock cluster.
|
||||||
|
List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
|
||||||
|
|
||||||
|
assertClusterAsBalanced(balancedCluster);
|
||||||
|
|
||||||
|
ServerName sn = serverMap.keySet().toArray(new ServerName[serverMap.size()])[0];
|
||||||
|
|
||||||
|
ServerName deadSn = new ServerName(sn.getHostname(), sn.getPort(), sn.getStartcode() -100);
|
||||||
|
|
||||||
|
serverMap.put(deadSn, new ArrayList<HRegionInfo>(0));
|
||||||
|
|
||||||
|
plans = loadBalancer.balanceCluster(serverMap);
|
||||||
|
assertNull(plans);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 20000)
|
@Test (timeout = 20000)
|
||||||
|
@ -190,7 +240,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
||||||
int numRegions = 1000;
|
int numRegions = 1000;
|
||||||
int numRegionsPerServer = 40; //all servers except one
|
int numRegionsPerServer = 40; //all servers except one
|
||||||
int numTables = 10;
|
int numTables = 10;
|
||||||
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables);
|
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 20000)
|
@Test (timeout = 20000)
|
||||||
|
@ -199,45 +249,92 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
||||||
int numRegions = 2000;
|
int numRegions = 2000;
|
||||||
int numRegionsPerServer = 40; //all servers except one
|
int numRegionsPerServer = 40; //all servers except one
|
||||||
int numTables = 10;
|
int numTables = 10;
|
||||||
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables);
|
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 40000)
|
@Test (timeout = 20000)
|
||||||
|
public void testSmallCluster3() {
|
||||||
|
int numNodes = 20;
|
||||||
|
int numRegions = 2000;
|
||||||
|
int numRegionsPerServer = 1; // all servers except one
|
||||||
|
int numTables = 10;
|
||||||
|
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, false /* max moves */);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test (timeout = 800000)
|
||||||
public void testMidCluster() {
|
public void testMidCluster() {
|
||||||
int numNodes = 100;
|
int numNodes = 100;
|
||||||
int numRegions = 10000;
|
int numRegions = 10000;
|
||||||
int numRegionsPerServer = 60; //all servers except one
|
int numRegionsPerServer = 60; // all servers except one
|
||||||
int numTables = 40;
|
int numTables = 40;
|
||||||
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables);
|
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 1200000)
|
@Test (timeout = 800000)
|
||||||
public void testMidCluster2() {
|
public void testMidCluster2() {
|
||||||
int numNodes = 200;
|
int numNodes = 200;
|
||||||
int numRegions = 100000;
|
int numRegions = 100000;
|
||||||
int numRegionsPerServer = 40; //all servers except one
|
int numRegionsPerServer = 40; // all servers except one
|
||||||
int numTables = 400;
|
int numTables = 400;
|
||||||
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables);
|
testWithCluster(numNodes,
|
||||||
|
numRegions,
|
||||||
|
numRegionsPerServer,
|
||||||
|
numTables,
|
||||||
|
false /* num large num regions means may not always get to best balance with one run */);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test (timeout = 800000)
|
||||||
|
public void testMidCluster3() {
|
||||||
|
int numNodes = 100;
|
||||||
|
int numRegions = 2000;
|
||||||
|
int numRegionsPerServer = 9; // all servers except one
|
||||||
|
int numTables = 110;
|
||||||
|
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
|
||||||
|
// TODO(eclark): Make sure that the tables are well distributed.
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Ignore
|
|
||||||
//TODO: This still does not finish, making the LoadBalancer unusable at this scale. We should solve this.
|
|
||||||
//There are two reasons so far;
|
|
||||||
// - It takes too long for iterating for all servers
|
|
||||||
// - Moving one region out of the loaded server only costs a slight decrease in the cost of regionCountSkewCost
|
|
||||||
// but also a slight increase on the moveCost. loadMultiplier / moveCostMultiplier is not high enough to bring down
|
|
||||||
// the total cost, so that the eager selection cannot continue. This can be solved by smt like
|
|
||||||
// http://en.wikipedia.org/wiki/Simulated_annealing instead of random walk with eager selection
|
|
||||||
public void testLargeCluster() {
|
public void testLargeCluster() {
|
||||||
int numNodes = 1000;
|
int numNodes = 1000;
|
||||||
int numRegions = 100000; //100 regions per RS
|
int numRegions = 100000; //100 regions per RS
|
||||||
int numRegionsPerServer = 80; //all servers except one
|
int numRegionsPerServer = 80; //all servers except one
|
||||||
int numTables = 100;
|
int numTables = 100;
|
||||||
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables);
|
testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void testWithCluster(int numNodes, int numRegions, int numRegionsPerServer, int numTables) {
|
protected void testWithCluster(int numNodes,
|
||||||
|
int numRegions,
|
||||||
|
int numRegionsPerServer,
|
||||||
|
int numTables,
|
||||||
|
boolean assertFullyBalanced) {
|
||||||
|
Map<ServerName, List<HRegionInfo>> serverMap =
|
||||||
|
createServerMap(numNodes, numRegions, numRegionsPerServer, numTables);
|
||||||
|
|
||||||
|
List<ServerAndLoad> list = convertToList(serverMap);
|
||||||
|
LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
|
||||||
|
|
||||||
|
// Run the balancer.
|
||||||
|
List<RegionPlan> plans = loadBalancer.balanceCluster(serverMap);
|
||||||
|
assertNotNull(plans);
|
||||||
|
|
||||||
|
// Check to see that this actually got to a stable place.
|
||||||
|
if (assertFullyBalanced) {
|
||||||
|
// Apply the plan to the mock cluster.
|
||||||
|
List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
|
||||||
|
|
||||||
|
// Print out the cluster loads to make debugging easier.
|
||||||
|
LOG.info("Mock Balance : " + printMock(balancedCluster));
|
||||||
|
assertClusterAsBalanced(balancedCluster);
|
||||||
|
List<RegionPlan> secondPlans = loadBalancer.balanceCluster(serverMap);
|
||||||
|
assertNull(secondPlans);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<ServerName, List<HRegionInfo>> createServerMap(int numNodes,
|
||||||
|
int numRegions,
|
||||||
|
int numRegionsPerServer,
|
||||||
|
int numTables) {
|
||||||
//construct a cluster of numNodes, having a total of numRegions. Each RS will hold
|
//construct a cluster of numNodes, having a total of numRegions. Each RS will hold
|
||||||
//numRegionsPerServer many regions except for the last one, which will host all the
|
//numRegionsPerServer many regions except for the last one, which will host all the
|
||||||
//remaining regions
|
//remaining regions
|
||||||
|
@ -246,8 +343,6 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
||||||
cluster[i] = numRegionsPerServer;
|
cluster[i] = numRegionsPerServer;
|
||||||
}
|
}
|
||||||
cluster[cluster.length - 1] = numRegions - ((cluster.length - 1) * numRegionsPerServer);
|
cluster[cluster.length - 1] = numRegions - ((cluster.length - 1) * numRegionsPerServer);
|
||||||
|
return mockClusterServers(cluster, numTables);
|
||||||
assertNotNull(loadBalancer.balanceCluster(mockClusterServers(cluster, numTables)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue