HBASE-12007 StochasticBalancer should avoid putting user regions on master

This commit is contained in:
Jimmy Xiang 2014-09-17 12:34:43 -07:00
parent 098f8c4401
commit 74c6b33e84
4 changed files with 39 additions and 42 deletions

View File

@ -160,6 +160,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
tablesOnMaster, rackManager);
}
@SuppressWarnings("unchecked")
protected Cluster(
ServerName masterServerName,
Collection<HRegionInfo> unassignedRegions,
@ -847,35 +848,25 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
private static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
// The weight means that each region on the active/backup master is
// The weight means that each region on the backup master is
// equal to that many regions on a normal regionserver, in calculating
// the region load by the load balancer. So that the active/backup master
// the region load by the load balancer. So that the backup master
// can host less (or equal if weight = 1) regions than normal regionservers.
//
// The weight can be used to control the number of regions on backup
// masters, which shouldn't host as many regions as normal regionservers.
// So that we don't need to move around too many regions when a
// backup master becomes the active one.
//
// Currently, the active master weight is used only by StockasticLoadBalancer.
// Generally, we don't put any user regions on the active master, which
// only hosts regions of tables defined in TABLES_ON_MASTER.
// That's why the default activeMasterWeight is high.
public static final String BACKUP_MASTER_WEIGHT_KEY =
"hbase.balancer.backupMasterWeight";
public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 1;
private static final String ACTIVE_MASTER_WEIGHT_KEY =
"hbase.balancer.activeMasterWeight";
private static final int DEFAULT_ACTIVE_MASTER_WEIGHT = 200;
// Regions of these tables are put on the master by default.
private static final String[] DEFAULT_TABLES_ON_MASTER =
new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
TableName.META_TABLE_NAME.getNameAsString()};
protected int activeMasterWeight;
protected int backupMasterWeight;
// a flag to indicate if assigning regions to backup masters
@ -896,8 +887,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
else if (slop > 1) slop = 1;
this.config = conf;
activeMasterWeight = conf.getInt(
ACTIVE_MASTER_WEIGHT_KEY, DEFAULT_ACTIVE_MASTER_WEIGHT);
backupMasterWeight = conf.getInt(
BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT);
if (backupMasterWeight < 1) {

View File

@ -157,7 +157,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
costFunctions = new CostFunction[]{
new RegionCountSkewCostFunction(conf, activeMasterWeight, backupMasterWeight),
new RegionCountSkewCostFunction(conf, backupMasterWeight),
new MoveCostFunction(conf),
localityCost,
new TableSkewCostFunction(conf),
@ -421,7 +421,11 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
return -1;
}
return RANDOM.nextInt(cluster.numServers);
int n = RANDOM.nextInt(cluster.numServers);
if (cluster.numServers > 1 && cluster.isActiveMaster(n)) {
n = (n + 1) % cluster.numServers;
}
return n;
}
protected int pickRandomRack(Cluster cluster) {
@ -433,7 +437,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
}
protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
if (cluster.numServers < 2) {
if (cluster.numServers <= 2) {
return -1;
}
while (true) {
@ -523,8 +527,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
if (servers.length <= 2) {
return thisServer -1;
}
int index = 0;
while (servers[index] == null || servers[index] == thisServer) {
while (servers[index] == null || servers[index] == thisServer
|| cluster.isActiveMaster(index)) {
index++;
if (index == servers.length) {
return -1;
@ -537,7 +546,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
int index = servers.length - 1;
while (servers[index] == null || servers[index] == thisServer) {
while (servers[index] == null || servers[index] == thisServer
|| cluster.isActiveMaster(index)) {
index--;
if (index < 0) {
return -1;
@ -787,14 +797,23 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
protected double costFromArray(double[] stats) {
double totalCost = 0;
double total = getSum(stats);
double mean = total/((double)stats.length);
double count = stats.length;
if (stats.length > 1 && cluster.masterServerName != null) {
count--; // Exclude the active master
}
double mean = total/count;
// Compute max as if all region servers had 0 and one had the sum of all costs. This must be
// a zero sum cost for this to make sense.
// TODO: Should we make this sum of square errors?
double max = ((count - 1) * mean) + (total - mean);
for (double n : stats) {
for (int i=0; i<stats.length; i++) {
if (stats.length > 1 && cluster.isActiveMaster(i)) {
// Not count the active master load
continue;
}
double n = stats[i];
double diff = Math.abs(mean - n);
totalCost += diff;
}
@ -881,16 +900,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
"hbase.master.balancer.stochastic.regionCountCost";
private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
private double activeMasterWeight;
private double backupMasterWeight;
private double[] stats = null;
RegionCountSkewCostFunction(Configuration conf,
double activeMasterWeight, double backupMasterWeight) {
RegionCountSkewCostFunction(Configuration conf, double backupMasterWeight) {
super(conf);
// Load multiplier should be the greatest as it is the most general way to balance data.
this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
this.activeMasterWeight = activeMasterWeight;
this.backupMasterWeight = backupMasterWeight;
}
@ -904,9 +920,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
stats[i] = cluster.regionsPerServer[i].length;
// Use some weight on regions assigned to active/backup masters,
// so that they won't carry as many regions as normal regionservers.
if (cluster.isActiveMaster(i)) {
stats[i] += cluster.numUserRegionsOnMaster * (activeMasterWeight - 1);
} else if (cluster.isBackupMaster(i)) {
if (cluster.isBackupMaster(i)) {
stats[i] *= backupMasterWeight;
}
}

View File

@ -29,7 +29,6 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.RegionLocator;
@ -96,6 +95,7 @@ public class TestRegionRebalancing {
* @throws InterruptedException
*/
@Test (timeout=300000)
@SuppressWarnings("deprecation")
public void testRebalanceOnRegionServerNumberChange()
throws IOException, InterruptedException {
HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
@ -149,15 +149,7 @@ public class TestRegionRebalancing {
assert(UTIL.getHBaseCluster().getMaster().balance() == true);
assertRegionsAreBalanced();
table.close();
}
/** figure out how many regions are currently being served. */
private int getRegionCount() throws IOException {
int total = 0; // Regions on master are ignored since not counted for balancing
for (HRegionServer server : getOnlineRegionServers()) {
total += ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
}
return total;
admin.close();
}
/**
@ -177,7 +169,7 @@ public class TestRegionRebalancing {
// make sure all the regions are reassigned before we test balance
waitForAllRegionsAssigned();
int regionCount = getRegionCount();
long regionCount = UTIL.getMiniHBaseCluster().countServedRegions();
List<HRegionServer> servers = getOnlineRegionServers();
double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
@ -241,9 +233,10 @@ public class TestRegionRebalancing {
*/
private void waitForAllRegionsAssigned() throws IOException {
int totalRegions = HBaseTestingUtility.KEYS.length;
while (getRegionCount() < totalRegions) {
while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) {
// while (!cluster.getMaster().allRegionsAssigned()) {
LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are " + getRegionCount() + " right now.");
LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are "
+ UTIL.getMiniHBaseCluster().countServedRegions() + " right now.");
try {
Thread.sleep(200);
} catch (InterruptedException e) {}

View File

@ -194,7 +194,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
public void testSkewCost() {
Configuration conf = HBaseConfiguration.create();
StochasticLoadBalancer.CostFunction
costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf, 1, 1);
costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf, 1);
for (int[] mockCluster : clusterStateMocks) {
costFunction.init(mockCluster(mockCluster));
double cost = costFunction.cost();
@ -240,6 +240,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
Configuration conf = HBaseConfiguration.create();
StochasticLoadBalancer.CostFromRegionLoadFunction
costFunction = new StochasticLoadBalancer.MemstoreSizeCostFunction(conf);
costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1}));
double[] statOne = new double[100];
for (int i =0; i < 100; i++) {