HBASE-10661 TestStochasticLoadBalancer.testRegionReplicationOnMidClusterWithRacks() is flaky

git-svn-id: https://svn.apache.org/repos/asf/hbase/branches/hbase-10070@1593587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Enis Soztutar 2014-05-09 18:13:30 +00:00
parent e86b13f75a
commit d84c863525
2 changed files with 150 additions and 98 deletions

View File

@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.RegionLoad;
import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
@ -143,7 +142,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
new RandomCandidateGenerator(), new RandomCandidateGenerator(),
new LoadCandidateGenerator(), new LoadCandidateGenerator(),
localityCandidateGenerator, localityCandidateGenerator,
new RegionReplicaCandidateGenerator(), new RegionReplicaRackCandidateGenerator(),
}; };
regionLoadFunctions = new CostFromRegionLoadFunction[] { regionLoadFunctions = new CostFromRegionLoadFunction[] {
@ -424,6 +423,14 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
return RANDOM.nextInt(cluster.numServers); return RANDOM.nextInt(cluster.numServers);
} }
protected int pickRandomRack(Cluster cluster) {
if (cluster.numRacks < 1) {
return -1;
}
return RANDOM.nextInt(cluster.numRacks);
}
protected int pickOtherRandomServer(Cluster cluster, int serverIndex) { protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
if (cluster.numServers < 2) { if (cluster.numServers < 2) {
return -1; return -1;
@ -436,6 +443,18 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
} }
} }
protected int pickOtherRandomRack(Cluster cluster, int rackIndex) {
if (cluster.numRacks < 2) {
return -1;
}
while (true) {
int otherRackIndex = pickRandomRack(cluster);
if (otherRackIndex != rackIndex) {
return otherRackIndex;
}
}
}
protected Cluster.Action pickRandomRegions(Cluster cluster, protected Cluster.Action pickRandomRegions(Cluster cluster,
int thisServer, int thisServer,
int otherServer) { int otherServer) {
@ -489,7 +508,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
} }
} }
public static class LoadCandidateGenerator extends CandidateGenerator { static class LoadCandidateGenerator extends CandidateGenerator {
@Override @Override
Cluster.Action generate(Cluster cluster) { Cluster.Action generate(Cluster cluster) {
@ -585,39 +604,40 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Generates candidates which moves the replicas out of the region server for * Generates candidates which moves the replicas out of the region server for
* co-hosted region replicas * co-hosted region replicas
*/ */
public static class RegionReplicaCandidateGenerator extends CandidateGenerator { static class RegionReplicaCandidateGenerator extends CandidateGenerator {
RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator(); RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator();
@Override /**
Cluster.Action generate(Cluster cluster) { * Randomly select one regionIndex out of all region replicas co-hosted in the same group
* (a group is a server, host or rack)
int serverIndex = pickRandomServer(cluster); * @param primariesOfRegionsPerGroup either Cluster.primariesOfRegionsPerServer,
* primariesOfRegionsPerHost or primariesOfRegionsPerRack
if (cluster.numServers <= 1 || serverIndex == -1) { * @param regionsPerGroup either Cluster.regionsPerServer, regionsPerHost or regionsPerRack
return Cluster.NullAction; * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
} * @return a regionIndex for the selected primary or -1 if there is no co-locating
*/
// randomly select one primaryIndex out of all region replicas in the same server int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] regionsPerGroup
// we don't know how many region replicas are co-hosted, we will randomly select one , int[] regionIndexToPrimaryIndex) {
// using reservoir sampling (http://gregable.com/2007/10/reservoir-sampling.html)
int currentPrimary = -1; int currentPrimary = -1;
int currentPrimaryIndex = -1; int currentPrimaryIndex = -1;
int primaryIndex = -1; int selectedPrimaryIndex = -1;
double currentLargestRandom = -1; double currentLargestRandom = -1;
// regionsByPrimaryPerServer is a sorted array. Since it contains the primary region // primariesOfRegionsPerGroup is a sorted array. Since it contains the primary region
// ids for the regions hosted in server, a consecutive repetition means that replicas // ids for the regions hosted in server, a consecutive repetition means that replicas
// are co-hosted // are co-hosted
for (int j = 0; j <= cluster.primariesOfRegionsPerServer[serverIndex].length; j++) { for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
int primary = j < cluster.primariesOfRegionsPerServer[serverIndex].length int primary = j < primariesOfRegionsPerGroup.length
? cluster.primariesOfRegionsPerServer[serverIndex][j] : -1; ? primariesOfRegionsPerGroup[j] : -1;
if (primary != currentPrimary) { // check for whether we see a new primary if (primary != currentPrimary) { // check for whether we see a new primary
int numReplicas = j - currentPrimaryIndex; int numReplicas = j - currentPrimaryIndex;
if (numReplicas > 1) { // means consecutive primaries, indicating co-location if (numReplicas > 1) { // means consecutive primaries, indicating co-location
// decide to select this primary region id or not // decide to select this primary region id or not
double currentRandom = RANDOM.nextDouble(); double currentRandom = RANDOM.nextDouble();
// we don't know how many region replicas are co-hosted, we will randomly select one
// using reservoir sampling (http://gregable.com/2007/10/reservoir-sampling.html)
if (currentRandom > currentLargestRandom) { if (currentRandom > currentLargestRandom) {
primaryIndex = currentPrimary; // select this primary selectedPrimaryIndex = currentPrimary;
currentLargestRandom = currentRandom; currentLargestRandom = currentRandom;
} }
} }
@ -626,30 +646,73 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
} }
} }
// we have found the primary id for the region to move. Now find the actual regionIndex
// with the given primary, prefer to move the secondary region.
for (int j = 0; j < regionsPerGroup.length; j++) {
int regionIndex = regionsPerGroup[j];
if (selectedPrimaryIndex == regionIndexToPrimaryIndex[regionIndex]) {
// always move the secondary, not the primary
if (selectedPrimaryIndex != regionIndex) {
return regionIndex;
}
}
}
return -1;
}
@Override
Cluster.Action generate(Cluster cluster) {
int serverIndex = pickRandomServer(cluster);
if (cluster.numServers <= 1 || serverIndex == -1) {
return Cluster.NullAction;
}
int regionIndex = selectCoHostedRegionPerGroup(
cluster.primariesOfRegionsPerServer[serverIndex],
cluster.regionsPerServer[serverIndex],
cluster.regionIndexToPrimaryIndex);
// if there are no pairs of region replicas co-hosted, default to random generator // if there are no pairs of region replicas co-hosted, default to random generator
if (primaryIndex == -1) { if (regionIndex == -1) {
// default to randompicker // default to randompicker
return randomGenerator.generate(cluster); return randomGenerator.generate(cluster);
} }
// we have found the primary id for the region to move. Now find the actual regionIndex
// with the given primary, prefer to move the secondary region.
int regionIndex = -1;
for (int k = 0; k < cluster.regionsPerServer[serverIndex].length; k++) {
int region = cluster.regionsPerServer[serverIndex][k];
if (primaryIndex == cluster.regionIndexToPrimaryIndex[region]) {
// always move the secondary, not the primary
if (!RegionReplicaUtil.isDefaultReplica(cluster.regions[region])) {
regionIndex = region;
break;
}
}
}
int toServerIndex = pickOtherRandomServer(cluster, serverIndex); int toServerIndex = pickOtherRandomServer(cluster, serverIndex);
int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f); int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
}
}
/**
* Generates candidates which moves the replicas out of the rack for
* co-hosted region replicas in the same rack
*/
static class RegionReplicaRackCandidateGenerator extends RegionReplicaCandidateGenerator {
@Override
Cluster.Action generate(Cluster cluster) {
int rackIndex = pickRandomRack(cluster);
if (cluster.numRacks <= 1 || rackIndex == -1) {
return super.generate(cluster);
}
int regionIndex = selectCoHostedRegionPerGroup(
cluster.primariesOfRegionsPerRack[rackIndex],
cluster.regionsPerRack[rackIndex],
cluster.regionIndexToPrimaryIndex);
// if there are no pairs of region replicas co-hosted, default to random generator
if (regionIndex == -1) {
// default to randompicker
return randomGenerator.generate(cluster);
}
int serverIndex = cluster.regionIndexToServerIndex[regionIndex];
int toRackIndex = pickOtherRandomRack(cluster, rackIndex);
int rand = RANDOM.nextInt(cluster.serversPerRack[toRackIndex].length);
int toServerIndex = cluster.serversPerRack[toRackIndex][rand];
int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex); return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
} }
} }
@ -657,7 +720,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
/** /**
* Base class of StochasticLoadBalancer's Cost Functions. * Base class of StochasticLoadBalancer's Cost Functions.
*/ */
public abstract static class CostFunction { abstract static class CostFunction {
private float multiplier = 0; private float multiplier = 0;
@ -768,7 +831,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Given the starting state of the regions and a potential ending state * Given the starting state of the regions and a potential ending state
* compute cost based upon the number of regions that have moved. * compute cost based upon the number of regions that have moved.
*/ */
public static class MoveCostFunction extends CostFunction { static class MoveCostFunction extends CostFunction {
private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost"; private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
private static final String MAX_MOVES_PERCENT_KEY = private static final String MAX_MOVES_PERCENT_KEY =
"hbase.master.balancer.stochastic.maxMovePercent"; "hbase.master.balancer.stochastic.maxMovePercent";
@ -812,7 +875,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Compute the cost of a potential cluster state from skew in number of * Compute the cost of a potential cluster state from skew in number of
* regions on a cluster. * regions on a cluster.
*/ */
public static class RegionCountSkewCostFunction extends CostFunction { static class RegionCountSkewCostFunction extends CostFunction {
private static final String REGION_COUNT_SKEW_COST_KEY = private static final String REGION_COUNT_SKEW_COST_KEY =
"hbase.master.balancer.stochastic.regionCountCost"; "hbase.master.balancer.stochastic.regionCountCost";
private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500; private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
@ -854,7 +917,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Compute the cost of a potential cluster configuration based upon how evenly * Compute the cost of a potential cluster configuration based upon how evenly
* distributed tables are. * distributed tables are.
*/ */
public static class TableSkewCostFunction extends CostFunction { static class TableSkewCostFunction extends CostFunction {
private static final String TABLE_SKEW_COST_KEY = private static final String TABLE_SKEW_COST_KEY =
"hbase.master.balancer.stochastic.tableSkewCost"; "hbase.master.balancer.stochastic.tableSkewCost";
@ -883,7 +946,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Compute a cost of a potential cluster configuration based upon where * Compute a cost of a potential cluster configuration based upon where
* {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located. * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
*/ */
public static class LocalityCostFunction extends CostFunction { static class LocalityCostFunction extends CostFunction {
private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost"; private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
private static final float DEFAULT_LOCALITY_COST = 25; private static final float DEFAULT_LOCALITY_COST = 25;
@ -943,7 +1006,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Base class the allows writing costs functions from rolling average of some * Base class the allows writing costs functions from rolling average of some
* number from RegionLoad. * number from RegionLoad.
*/ */
public abstract static class CostFromRegionLoadFunction extends CostFunction { abstract static class CostFromRegionLoadFunction extends CostFunction {
private ClusterStatus clusterStatus = null; private ClusterStatus clusterStatus = null;
private Map<String, Deque<RegionLoad>> loads = null; private Map<String, Deque<RegionLoad>> loads = null;
@ -1016,7 +1079,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* computed cost will be. This uses a rolling average of regionload. * computed cost will be. This uses a rolling average of regionload.
*/ */
public static class ReadRequestCostFunction extends CostFromRegionLoadFunction { static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
private static final String READ_REQUEST_COST_KEY = private static final String READ_REQUEST_COST_KEY =
"hbase.master.balancer.stochastic.readRequestCost"; "hbase.master.balancer.stochastic.readRequestCost";
@ -1038,7 +1101,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Compute the cost of total number of write requests. The more unbalanced the higher the * Compute the cost of total number of write requests. The more unbalanced the higher the
* computed cost will be. This uses a rolling average of regionload. * computed cost will be. This uses a rolling average of regionload.
*/ */
public static class WriteRequestCostFunction extends CostFromRegionLoadFunction { static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
private static final String WRITE_REQUEST_COST_KEY = private static final String WRITE_REQUEST_COST_KEY =
"hbase.master.balancer.stochastic.writeRequestCost"; "hbase.master.balancer.stochastic.writeRequestCost";
@ -1061,7 +1124,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* though, since if numReplicas > numRegionServers, we still want to keep the * though, since if numReplicas > numRegionServers, we still want to keep the
* replica open. * replica open.
*/ */
public static class RegionReplicaHostCostFunction extends CostFunction { static class RegionReplicaHostCostFunction extends CostFunction {
private static final String REGION_REPLICA_HOST_COST_KEY = private static final String REGION_REPLICA_HOST_COST_KEY =
"hbase.master.balancer.stochastic.regionReplicaHostCostKey"; "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000; private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
@ -1175,7 +1238,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* cost to hosting replicas of the same region in the same rack. We do not prevent the case * cost to hosting replicas of the same region in the same rack. We do not prevent the case
* though. * though.
*/ */
public static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction { static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction {
private static final String REGION_REPLICA_RACK_COST_KEY = private static final String REGION_REPLICA_RACK_COST_KEY =
"hbase.master.balancer.stochastic.regionReplicaRackCostKey"; "hbase.master.balancer.stochastic.regionReplicaRackCostKey";
private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000; private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000;
@ -1218,7 +1281,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Compute the cost of total memstore size. The more unbalanced the higher the * Compute the cost of total memstore size. The more unbalanced the higher the
* computed cost will be. This uses a rolling average of regionload. * computed cost will be. This uses a rolling average of regionload.
*/ */
public static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction { static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
private static final String MEMSTORE_SIZE_COST_KEY = private static final String MEMSTORE_SIZE_COST_KEY =
"hbase.master.balancer.stochastic.memstoreSizeCost"; "hbase.master.balancer.stochastic.memstoreSizeCost";
@ -1238,7 +1301,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
* Compute the cost of total open storefiles size. The more unbalanced the higher the * Compute the cost of total open storefiles size. The more unbalanced the higher the
* computed cost will be. This uses a rolling average of regionload. * computed cost will be. This uses a rolling average of regionload.
*/ */
public static class StoreFileCostFunction extends CostFromRegionLoadFunction { static class StoreFileCostFunction extends CostFromRegionLoadFunction {
private static final String STOREFILE_SIZE_COST_KEY = private static final String STOREFILE_SIZE_COST_KEY =
"hbase.master.balancer.stochastic.storefileSizeCost"; "hbase.master.balancer.stochastic.storefileSizeCost";

View File

@ -49,8 +49,6 @@ import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.NetworkTopology;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
@ -67,8 +65,6 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
public static void beforeAllTests() throws Exception { public static void beforeAllTests() throws Exception {
conf = HBaseConfiguration.create(); conf = HBaseConfiguration.create();
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setClass("hbase.util.ip.to.rack.determiner",
MyRackResolver.class, DNSToSwitchMapping.class);
loadBalancer = new StochasticLoadBalancer(); loadBalancer = new StochasticLoadBalancer();
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
} }
@ -492,7 +488,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true); testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
} }
@Test (timeout = 60000) @Test (timeout = 800000)
public void testRegionReplicasOnSmallCluster() { public void testRegionReplicasOnSmallCluster() {
int numNodes = 10; int numNodes = 10;
int numRegions = 1000; int numRegions = 1000;
@ -502,9 +498,11 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true); testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
} }
@Test (timeout = 60000) @Test (timeout = 800000)
public void testRegionReplicasOnMidCluster() { public void testRegionReplicasOnMidCluster() {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numNodes = 200; int numNodes = 200;
int numRegions = 40 * 200; int numRegions = 40 * 200;
@ -514,34 +512,38 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true); testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
} }
@Test (timeout = 60000) @Test (timeout = 800000)
public void testRegionReplicasOnLargeCluster() { public void testRegionReplicasOnLargeCluster() {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numNodes = 1000; int numNodes = 1000;
int numRegions = 40 * numNodes; //40 regions per RS int numRegions = 20 * numNodes; // 20 * replication regions per RS
int numRegionsPerServer = 30; //all servers except one int numRegionsPerServer = 19; // all servers except one
int numTables = 100; int numTables = 100;
int replication = 3; int replication = 3;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true); testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
} }
@Test (timeout = 60000) @Test (timeout = 800000)
public void testRegionReplicasOnMidClusterHighReplication() { public void testRegionReplicasOnMidClusterHighReplication() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L); conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numNodes = 100; int numNodes = 80;
int numRegions = 6 * 100; int numRegions = 6 * numNodes;
int replication = 100; // 100 replicas per region, one for each server int replication = 80; // 80 replicas per region, one for each server
int numRegionsPerServer = 5; int numRegionsPerServer = 5;
int numTables = 10; int numTables = 10;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true); testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, false, true);
} }
@Test (timeout = 60000) @Test (timeout = 800000)
public void testRegionReplicationOnMidClusterSameHosts() { public void testRegionReplicationOnMidClusterSameHosts() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L); conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numHosts = 100; int numHosts = 100;
@ -584,34 +586,35 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
} }
} }
@Test (timeout = 120000) @Test (timeout = 800000)
public void testRegionReplicationOnMidClusterWithRacks() { public void testRegionReplicationOnMidClusterWithRacks() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L); conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 10000000L);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 60 * 1000); // 60 sec conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numNodes = 50; int numNodes = 30;
int numRegions = numNodes * 30; int numRegions = numNodes * 30;
int replication = 3; // 3 replicas per region int replication = 3; // 3 replicas per region
int numRegionsPerServer = 25; int numRegionsPerServer = 28;
int numTables = 10; int numTables = 10;
int numRacks = 4; // all replicas should be on a different rack int numRacks = 4; // all replicas should be on a different rack
Map<ServerName, List<HRegionInfo>> serverMap = Map<ServerName, List<HRegionInfo>> serverMap =
createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables); createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables);
RackManager rm = new ForTestRackManager(numRacks); RackManager rm = new ForTestRackManager(numRacks);
testWithCluster(serverMap, rm, true, true); testWithCluster(serverMap, rm, false, true);
} }
@Test (timeout = 60000) @Test (timeout = 800000)
public void testRegionReplicationOnMidClusterReplicationGreaterThanNumNodes() { public void testRegionReplicationOnMidClusterReplicationGreaterThanNumNodes() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L); conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numNodes = 80; int numNodes = 40;
int numRegions = 6 * 100; int numRegions = 6 * 50;
int replication = 100; // 100 replicas per region, more than numNodes int replication = 50; // 50 replicas per region, more than numNodes
int numRegionsPerServer = 5; int numRegionsPerServer = 6;
int numTables = 10; int numTables = 10;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, false); testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, false);
} }
@ -639,15 +642,19 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
assertNotNull(plans); assertNotNull(plans);
// Check to see that this actually got to a stable place. // Check to see that this actually got to a stable place.
if (assertFullyBalanced) { if (assertFullyBalanced || assertFullyBalancedForReplicas) {
// Apply the plan to the mock cluster. // Apply the plan to the mock cluster.
List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap); List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
// Print out the cluster loads to make debugging easier. // Print out the cluster loads to make debugging easier.
LOG.info("Mock Balance : " + printMock(balancedCluster)); LOG.info("Mock Balance : " + printMock(balancedCluster));
if (assertFullyBalanced) {
assertClusterAsBalanced(balancedCluster); assertClusterAsBalanced(balancedCluster);
List<RegionPlan> secondPlans = loadBalancer.balanceCluster(serverMap); List<RegionPlan> secondPlans = loadBalancer.balanceCluster(serverMap);
assertNull(secondPlans); assertNull(secondPlans);
}
if (assertFullyBalancedForReplicas) { if (assertFullyBalancedForReplicas) {
assertRegionReplicaPlacement(serverMap, rackManager); assertRegionReplicaPlacement(serverMap, rackManager);
} }
@ -682,22 +689,4 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
return clusterState; return clusterState;
} }
public static class MyRackResolver implements DNSToSwitchMapping {
public MyRackResolver(Configuration conf) {}
public List<String> resolve(List<String> names) {
List<String> racks = new ArrayList<String>(names.size());
for (int i = 0; i < names.size(); i++) {
racks.add(i, NetworkTopology.DEFAULT_RACK);
}
return racks;
}
public void reloadCachedMappings() {}
public void reloadCachedMappings(List<String> names) {
}
}
} }