HBASE-15529 Override needBalance in StochasticLoadBalancer (Guanghao Zhang)

This commit is contained in:
tedyu 2016-05-21 14:39:41 -07:00
parent aacc816ac7
commit 01d77bf37b
6 changed files with 96 additions and 20 deletions

View File

@ -66,7 +66,7 @@ import com.google.common.collect.Sets;
* *
*/ */
public abstract class BaseLoadBalancer implements LoadBalancer { public abstract class BaseLoadBalancer implements LoadBalancer {
private static final int MIN_SERVER_BALANCE = 2; protected static final int MIN_SERVER_BALANCE = 2;
private volatile boolean stopped = false; private volatile boolean stopped = false;
private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0); private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction;
@ -107,6 +108,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
protected static final String KEEP_REGION_LOADS = protected static final String KEEP_REGION_LOADS =
"hbase.master.balancer.stochastic.numRegionLoadsToRemember"; "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
private static final String TABLE_FUNCTION_SEP = "_"; private static final String TABLE_FUNCTION_SEP = "_";
protected static final String MIN_COST_NEED_BALANCE_KEY =
"hbase.master.balancer.stochastic.minCostNeedBalance";
private static final Random RANDOM = new Random(System.currentTimeMillis()); private static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class); private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
@ -118,6 +121,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
private int stepsPerRegion = 800; private int stepsPerRegion = 800;
private long maxRunningTime = 30 * 1000 * 1; // 30 seconds. private long maxRunningTime = 30 * 1000 * 1; // 30 seconds.
private int numRegionLoadsToRemember = 15; private int numRegionLoadsToRemember = 15;
private float minCostNeedBalance = 0.05f;
private CandidateGenerator[] candidateGenerators; private CandidateGenerator[] candidateGenerators;
private CostFromRegionLoadFunction[] regionLoadFunctions; private CostFromRegionLoadFunction[] regionLoadFunctions;
@ -163,6 +167,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember); numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
minCostNeedBalance = conf.getFloat(MIN_COST_NEED_BALANCE_KEY, minCostNeedBalance);
if (localityCandidateGenerator == null) { if (localityCandidateGenerator == null) {
localityCandidateGenerator = new LocalityBasedCandidateGenerator(services); localityCandidateGenerator = new LocalityBasedCandidateGenerator(services);
} }
@ -257,6 +263,41 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
return false; return false;
} }
@Override
protected boolean needsBalance(Cluster cluster) {
ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not running balancer because only " + cs.getNumServers()
+ " active regionserver(s)");
}
return false;
}
if (areSomeRegionReplicasColocated(cluster)) {
return true;
}
double total = 0.0;
float sumMultiplier = 0.0f;
for (CostFunction c : costFunctions) {
float multiplier = c.getMultiplier();
if (multiplier <= 0) {
continue;
}
sumMultiplier += multiplier;
total += c.cost() * multiplier;
}
if (total <= 0 || sumMultiplier <= 0
|| (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance)) {
LOG.info("Skipping load balancing because balanced cluster; " + "total cost is " + total
+ ", sum multiplier is " + sumMultiplier + " min cost which need balance is "
+ minCostNeedBalance);
return false;
}
return true;
}
@Override @Override
public synchronized List<RegionPlan> balanceCluster(TableName tableName, Map<ServerName, public synchronized List<RegionPlan> balanceCluster(TableName tableName, Map<ServerName,
List<HRegionInfo>> clusterState) { List<HRegionInfo>> clusterState) {
@ -298,19 +339,21 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
// Keep track of servers to iterate through them. // Keep track of servers to iterate through them.
Cluster cluster = new Cluster(clusterState, loads, finder, rackManager); Cluster cluster = new Cluster(clusterState, loads, finder, rackManager);
if (!needsBalance(cluster)) {
return null;
}
long startTime = EnvironmentEdgeManager.currentTime(); long startTime = EnvironmentEdgeManager.currentTime();
initCosts(cluster); initCosts(cluster);
if (!needsBalance(cluster)) {
return null;
}
double currentCost = computeCost(cluster, Double.MAX_VALUE); double currentCost = computeCost(cluster, Double.MAX_VALUE);
curOverallCost = currentCost; curOverallCost = currentCost;
for (int i = 0; i < this.curFunctionCosts.length; i++) { for (int i = 0; i < this.curFunctionCosts.length; i++) {
curFunctionCosts[i] = tempFunctionCosts[i]; curFunctionCosts[i] = tempFunctionCosts[i];
} }
LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
+ functionCost());
double initCost = currentCost; double initCost = currentCost;
double newCost = currentCost; double newCost = currentCost;
@ -407,6 +450,18 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
} }
} }
private String functionCost() {
StringBuilder builder = new StringBuilder();
for (CostFunction c:costFunctions) {
builder.append(c.getClass().getSimpleName());
builder.append(" : (");
builder.append(c.getMultiplier());
builder.append(", ");
builder.append(c.cost());
builder.append("); ");
}
return builder.toString();
}
/** /**
* Create all of the RegionPlan's needed to move from the initial cluster state to the desired * Create all of the RegionPlan's needed to move from the initial cluster state to the desired

View File

@ -83,6 +83,8 @@ public class TestRegionRebalancing {
@Before @Before
public void before() throws Exception { public void before() throws Exception {
UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName); UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName);
// set minCostNeedBalance to 0, make sure balancer run
UTIL.getConfiguration().setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.0f);
UTIL.startMiniCluster(1); UTIL.startMiniCluster(1);
this.desc = new HTableDescriptor(TableName.valueOf("test")); this.desc = new HTableDescriptor(TableName.valueOf("test"));
this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME)); this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));

View File

@ -70,6 +70,7 @@ public class BalancerTestBase {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
conf.setFloat("hbase.regions.slop", 0.0f); conf.setFloat("hbase.regions.slop", 0.0f);
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0); conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.0f);
loadBalancer = new StochasticLoadBalancer(); loadBalancer = new StochasticLoadBalancer();
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
} }

View File

@ -93,6 +93,20 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
} }
} }
@Test
public void testNeedBalance() {
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f);
loadBalancer.setConf(conf);
for (int[] mockCluster : clusterStateMocks) {
Map<ServerName, List<HRegionInfo>> servers = mockClusterServers(mockCluster);
List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
assertNull(plans);
}
// reset config
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.0f);
loadBalancer.setConf(conf);
}
/** /**
* Test the load balancing algorithm. * Test the load balancing algorithm.
* *

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.hbase.master.balancer;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.After;
import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
@ -27,13 +29,25 @@ import org.junit.experimental.categories.Category;
public class TestStochasticLoadBalancer2 extends BalancerTestBase { public class TestStochasticLoadBalancer2 extends BalancerTestBase {
private static final Log LOG = LogFactory.getLog(TestStochasticLoadBalancer2.class); private static final Log LOG = LogFactory.getLog(TestStochasticLoadBalancer2.class);
@Test (timeout = 800000) @Before
public void testRegionReplicasOnMidCluster() { public void before() {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L); conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0); conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
TestStochasticLoadBalancer.loadBalancer.setConf(conf); conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);
loadBalancer.setConf(conf);
}
@After
public void after() {
// reset config to make sure balancer run
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.0f);
loadBalancer.setConf(conf);
}
@Test (timeout = 800000)
public void testRegionReplicasOnMidCluster() {
int numNodes = 200; int numNodes = 200;
int numRegions = 40 * 200; int numRegions = 40 * 200;
int replication = 3; // 3 replicas per region int replication = 3; // 3 replicas per region
@ -44,11 +58,6 @@ public class TestStochasticLoadBalancer2 extends BalancerTestBase {
@Test (timeout = 800000) @Test (timeout = 800000)
public void testRegionReplicasOnLargeCluster() { public void testRegionReplicasOnLargeCluster() {
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
loadBalancer.setConf(conf);
int numNodes = 1000; int numNodes = 1000;
int numRegions = 20 * numNodes; // 20 * replication regions per RS int numRegions = 20 * numNodes; // 20 * replication regions per RS
int numRegionsPerServer = 19; // all servers except one int numRegionsPerServer = 19; // all servers except one
@ -61,8 +70,6 @@ public class TestStochasticLoadBalancer2 extends BalancerTestBase {
public void testRegionReplicasOnMidClusterHighReplication() { public void testRegionReplicasOnMidClusterHighReplication() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L); conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numNodes = 80; int numNodes = 80;
int numRegions = 6 * numNodes; int numRegions = 6 * numNodes;
@ -74,10 +81,7 @@ public class TestStochasticLoadBalancer2 extends BalancerTestBase {
@Test (timeout = 800000) @Test (timeout = 800000)
public void testRegionReplicationOnMidClusterReplicationGreaterThanNumNodes() { public void testRegionReplicationOnMidClusterReplicationGreaterThanNumNodes() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
conf.setFloat("hbase.master.balancer.stochastic.localityCost", 0);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
loadBalancer.setConf(conf); loadBalancer.setConf(conf);
int numNodes = 40; int numNodes = 40;
int numRegions = 6 * 50; int numRegions = 6 * 50;
@ -86,4 +90,4 @@ public class TestStochasticLoadBalancer2 extends BalancerTestBase {
int numTables = 10; int numTables = 10;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, false); testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, false);
} }
} }