HBASE-12007 StochasticBalancer should avoid putting user regions on master
This commit is contained in:
parent
098f8c4401
commit
74c6b33e84
|
@ -160,6 +160,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
tablesOnMaster, rackManager);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Cluster(
|
||||
ServerName masterServerName,
|
||||
Collection<HRegionInfo> unassignedRegions,
|
||||
|
@ -847,35 +848,25 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
private static final Random RANDOM = new Random(System.currentTimeMillis());
|
||||
private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
|
||||
|
||||
// The weight means that each region on the active/backup master is
|
||||
// The weight means that each region on the backup master is
|
||||
// equal to that many regions on a normal regionserver, in calculating
|
||||
// the region load by the load balancer. So that the active/backup master
|
||||
// the region load by the load balancer. So that the backup master
|
||||
// can host less (or equal if weight = 1) regions than normal regionservers.
|
||||
//
|
||||
// The weight can be used to control the number of regions on backup
|
||||
// masters, which shouldn't host as many regions as normal regionservers.
|
||||
// So that we don't need to move around too many regions when a
|
||||
// backup master becomes the active one.
|
||||
//
|
||||
// Currently, the active master weight is used only by StockasticLoadBalancer.
|
||||
// Generally, we don't put any user regions on the active master, which
|
||||
// only hosts regions of tables defined in TABLES_ON_MASTER.
|
||||
// That's why the default activeMasterWeight is high.
|
||||
public static final String BACKUP_MASTER_WEIGHT_KEY =
|
||||
"hbase.balancer.backupMasterWeight";
|
||||
public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 1;
|
||||
|
||||
private static final String ACTIVE_MASTER_WEIGHT_KEY =
|
||||
"hbase.balancer.activeMasterWeight";
|
||||
private static final int DEFAULT_ACTIVE_MASTER_WEIGHT = 200;
|
||||
|
||||
// Regions of these tables are put on the master by default.
|
||||
private static final String[] DEFAULT_TABLES_ON_MASTER =
|
||||
new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
|
||||
TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
|
||||
TableName.META_TABLE_NAME.getNameAsString()};
|
||||
|
||||
protected int activeMasterWeight;
|
||||
protected int backupMasterWeight;
|
||||
|
||||
// a flag to indicate if assigning regions to backup masters
|
||||
|
@ -896,8 +887,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
else if (slop > 1) slop = 1;
|
||||
|
||||
this.config = conf;
|
||||
activeMasterWeight = conf.getInt(
|
||||
ACTIVE_MASTER_WEIGHT_KEY, DEFAULT_ACTIVE_MASTER_WEIGHT);
|
||||
backupMasterWeight = conf.getInt(
|
||||
BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT);
|
||||
if (backupMasterWeight < 1) {
|
||||
|
|
|
@ -157,7 +157,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
|
||||
|
||||
costFunctions = new CostFunction[]{
|
||||
new RegionCountSkewCostFunction(conf, activeMasterWeight, backupMasterWeight),
|
||||
new RegionCountSkewCostFunction(conf, backupMasterWeight),
|
||||
new MoveCostFunction(conf),
|
||||
localityCost,
|
||||
new TableSkewCostFunction(conf),
|
||||
|
@ -421,7 +421,11 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
return -1;
|
||||
}
|
||||
|
||||
return RANDOM.nextInt(cluster.numServers);
|
||||
int n = RANDOM.nextInt(cluster.numServers);
|
||||
if (cluster.numServers > 1 && cluster.isActiveMaster(n)) {
|
||||
n = (n + 1) % cluster.numServers;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
protected int pickRandomRack(Cluster cluster) {
|
||||
|
@ -433,7 +437,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
}
|
||||
|
||||
protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
|
||||
if (cluster.numServers < 2) {
|
||||
if (cluster.numServers <= 2) {
|
||||
return -1;
|
||||
}
|
||||
while (true) {
|
||||
|
@ -523,8 +527,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
|
||||
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
|
||||
|
||||
if (servers.length <= 2) {
|
||||
return thisServer -1;
|
||||
}
|
||||
|
||||
int index = 0;
|
||||
while (servers[index] == null || servers[index] == thisServer) {
|
||||
while (servers[index] == null || servers[index] == thisServer
|
||||
|| cluster.isActiveMaster(index)) {
|
||||
index++;
|
||||
if (index == servers.length) {
|
||||
return -1;
|
||||
|
@ -537,7 +546,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
|
||||
|
||||
int index = servers.length - 1;
|
||||
while (servers[index] == null || servers[index] == thisServer) {
|
||||
while (servers[index] == null || servers[index] == thisServer
|
||||
|| cluster.isActiveMaster(index)) {
|
||||
index--;
|
||||
if (index < 0) {
|
||||
return -1;
|
||||
|
@ -787,14 +797,23 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
protected double costFromArray(double[] stats) {
|
||||
double totalCost = 0;
|
||||
double total = getSum(stats);
|
||||
double mean = total/((double)stats.length);
|
||||
|
||||
double count = stats.length;
|
||||
if (stats.length > 1 && cluster.masterServerName != null) {
|
||||
count--; // Exclude the active master
|
||||
}
|
||||
double mean = total/count;
|
||||
|
||||
// Compute max as if all region servers had 0 and one had the sum of all costs. This must be
|
||||
// a zero sum cost for this to make sense.
|
||||
// TODO: Should we make this sum of square errors?
|
||||
double max = ((count - 1) * mean) + (total - mean);
|
||||
for (double n : stats) {
|
||||
for (int i=0; i<stats.length; i++) {
|
||||
if (stats.length > 1 && cluster.isActiveMaster(i)) {
|
||||
// Not count the active master load
|
||||
continue;
|
||||
}
|
||||
double n = stats[i];
|
||||
double diff = Math.abs(mean - n);
|
||||
totalCost += diff;
|
||||
}
|
||||
|
@ -881,16 +900,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
"hbase.master.balancer.stochastic.regionCountCost";
|
||||
private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
|
||||
|
||||
private double activeMasterWeight;
|
||||
private double backupMasterWeight;
|
||||
private double[] stats = null;
|
||||
|
||||
RegionCountSkewCostFunction(Configuration conf,
|
||||
double activeMasterWeight, double backupMasterWeight) {
|
||||
RegionCountSkewCostFunction(Configuration conf, double backupMasterWeight) {
|
||||
super(conf);
|
||||
// Load multiplier should be the greatest as it is the most general way to balance data.
|
||||
this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
|
||||
this.activeMasterWeight = activeMasterWeight;
|
||||
this.backupMasterWeight = backupMasterWeight;
|
||||
}
|
||||
|
||||
|
@ -904,9 +920,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
|
|||
stats[i] = cluster.regionsPerServer[i].length;
|
||||
// Use some weight on regions assigned to active/backup masters,
|
||||
// so that they won't carry as many regions as normal regionservers.
|
||||
if (cluster.isActiveMaster(i)) {
|
||||
stats[i] += cluster.numUserRegionsOnMaster * (activeMasterWeight - 1);
|
||||
} else if (cluster.isBackupMaster(i)) {
|
||||
if (cluster.isBackupMaster(i)) {
|
||||
stats[i] *= backupMasterWeight;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import java.util.List;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.RegionLocator;
|
||||
|
@ -96,6 +95,7 @@ public class TestRegionRebalancing {
|
|||
* @throws InterruptedException
|
||||
*/
|
||||
@Test (timeout=300000)
|
||||
@SuppressWarnings("deprecation")
|
||||
public void testRebalanceOnRegionServerNumberChange()
|
||||
throws IOException, InterruptedException {
|
||||
HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
|
||||
|
@ -149,15 +149,7 @@ public class TestRegionRebalancing {
|
|||
assert(UTIL.getHBaseCluster().getMaster().balance() == true);
|
||||
assertRegionsAreBalanced();
|
||||
table.close();
|
||||
}
|
||||
|
||||
/** figure out how many regions are currently being served. */
|
||||
private int getRegionCount() throws IOException {
|
||||
int total = 0; // Regions on master are ignored since not counted for balancing
|
||||
for (HRegionServer server : getOnlineRegionServers()) {
|
||||
total += ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
|
||||
}
|
||||
return total;
|
||||
admin.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -177,7 +169,7 @@ public class TestRegionRebalancing {
|
|||
// make sure all the regions are reassigned before we test balance
|
||||
waitForAllRegionsAssigned();
|
||||
|
||||
int regionCount = getRegionCount();
|
||||
long regionCount = UTIL.getMiniHBaseCluster().countServedRegions();
|
||||
List<HRegionServer> servers = getOnlineRegionServers();
|
||||
double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
|
||||
int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
|
||||
|
@ -241,9 +233,10 @@ public class TestRegionRebalancing {
|
|||
*/
|
||||
private void waitForAllRegionsAssigned() throws IOException {
|
||||
int totalRegions = HBaseTestingUtility.KEYS.length;
|
||||
while (getRegionCount() < totalRegions) {
|
||||
while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) {
|
||||
// while (!cluster.getMaster().allRegionsAssigned()) {
|
||||
LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are " + getRegionCount() + " right now.");
|
||||
LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are "
|
||||
+ UTIL.getMiniHBaseCluster().countServedRegions() + " right now.");
|
||||
try {
|
||||
Thread.sleep(200);
|
||||
} catch (InterruptedException e) {}
|
||||
|
|
|
@ -194,7 +194,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
|||
public void testSkewCost() {
|
||||
Configuration conf = HBaseConfiguration.create();
|
||||
StochasticLoadBalancer.CostFunction
|
||||
costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf, 1, 1);
|
||||
costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf, 1);
|
||||
for (int[] mockCluster : clusterStateMocks) {
|
||||
costFunction.init(mockCluster(mockCluster));
|
||||
double cost = costFunction.cost();
|
||||
|
@ -240,6 +240,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
|
|||
Configuration conf = HBaseConfiguration.create();
|
||||
StochasticLoadBalancer.CostFromRegionLoadFunction
|
||||
costFunction = new StochasticLoadBalancer.MemstoreSizeCostFunction(conf);
|
||||
costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1}));
|
||||
|
||||
double[] statOne = new double[100];
|
||||
for (int i =0; i < 100; i++) {
|
||||
|
|
Loading…
Reference in New Issue