HBASE-1017 Region balancing does not bring newly added node within acceptable range

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@776823 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2009-05-20 19:58:38 +00:00
parent a16ee0714c
commit f37b552cbc
4 changed files with 162 additions and 30 deletions

View File

@ -272,6 +272,8 @@ Release 0.20.0 - Unreleased
HBASE-1420 add abliity to add and remove (table) indexes on existing
tables (Clint Morgan via Stack)
HBASE-1430 Read the logs in batches during log splitting to avoid OOME
HBASE-1017 Region balancing does not bring newly added node within
acceptable range (Evgeny Ryabitskiy via Stack)
OPTIMIZATIONS
HBASE-1412 Change values for delete column and column family in KeyValue

View File

@ -102,9 +102,9 @@ class RegionManager implements HConstants {
// How many regions to assign a server at a time.
private final int maxAssignInOneGo;
private final HMaster master;
final HMaster master;
private final RegionHistorian historian;
private final float slop;
private final LoadBalancer loadBalancer;
/** Set of regions to split. */
private final SortedMap<byte[], Pair<HRegionInfo,HServerAddress>>
@ -137,7 +137,7 @@ class RegionManager implements HConstants {
this.master = master;
this.historian = RegionHistorian.getInstance();
this.maxAssignInOneGo = conf.getInt("hbase.regions.percheckin", 10);
this.slop = conf.getFloat("hbase.regions.slop", (float)0.1);
this.loadBalancer = new LoadBalancer(conf);
// The root region
rootScannerThread = new RootScanner(master);
@ -199,20 +199,7 @@ class RegionManager implements HConstants {
if (!inSafeMode()) {
// We only do load balancing once all regions are assigned.
// This prevents churn while the cluster is starting up.
double avgLoad = master.serverManager.getAverageLoad();
double avgLoadWithSlop = avgLoad +
((this.slop != 0)? avgLoad * this.slop: avgLoad);
if (avgLoad > 2.0 &&
thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
if (LOG.isDebugEnabled()) {
LOG.debug("Server " + info.getServerName() +
" is overloaded. Server load: " +
thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
", slop: " + this.slop);
}
unassignSomeRegions(info, thisServersLoad,
avgLoad, mostLoadedRegions, returnMsgs);
}
loadBalancer.loadBalancing(info, mostLoadedRegions, returnMsgs);
}
} else {
// if there's only one server, just give it all the regions
@ -432,10 +419,9 @@ class RegionManager implements HConstants {
* Note that no synchronization is needed because the only caller
* (assignRegions) whose caller owns the monitor for RegionManager
*/
private void unassignSomeRegions(final HServerInfo info,
final HServerLoad load, final double avgLoad,
final HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
int numRegionsToClose = load.getNumberOfRegions() - (int)Math.ceil(avgLoad);
void unassignSomeRegions(final HServerInfo info,
int numRegionsToClose, final HRegionInfo[] mostLoadedRegions,
ArrayList<HMsg> returnMsgs) {
LOG.debug("Choosing to reassign " + numRegionsToClose
+ " regions. mostLoadedRegions has " + mostLoadedRegions.length
+ " regions in it.");
@ -1154,6 +1140,115 @@ class RegionManager implements HConstants {
}
}
/**
* Class to balance region servers load.
* It keeps Region Servers load in slop range by unassigning Regions
* from most loaded servers.
*
* Equilibrium is reached when load of all serves are in slop range
* [avgLoadMinusSlop, avgLoadPlusSlop], where
* avgLoadPlusSlop = Math.ceil(avgLoad * (1 + this.slop)), and
* avgLoadMinusSlop = Math.floor(avgLoad * (1 - this.slop)) - 1.
*/
private class LoadBalancer {
private float slop; // hbase.regions.slop
private final int maxRegToClose; // hbase.regions.close.max
LoadBalancer(HBaseConfiguration conf) {
this.slop = conf.getFloat("hbase.regions.slop", (float)0.1);
if (this.slop <= 0) this.slop = 1;
//maxRegToClose to constrain balance closing per one iteration
// -1 to turn off
// TODO: change default in HBASE-862, need a suggestion
this.maxRegToClose = conf.getInt("hbase.regions.close.max", -1);
}
/**
* Balance server load by unassigning some regions.
*
* @param info - server info
* @param mostLoadedRegions - array of most loaded regions
* @param returnMsgs - array of return massages
*/
void loadBalancing(HServerInfo info, HRegionInfo[] mostLoadedRegions,
ArrayList<HMsg> returnMsgs) {
HServerLoad servLoad = info.getLoad();
double avg = master.serverManager.getAverageLoad();
// nothing to balance if server load not more then average load
if (servLoad.getLoad() <= Math.ceil(avg) || avg <= 2.0) return;
// check if server is overloaded
int numRegionsToClose = balanceFromOverloaded(servLoad, avg);
// check if we can unload server by low loaded servers
if (numRegionsToClose <= 0)
balanceToLowloaded(info.getServerName(), servLoad, avg);
if (maxRegToClose > 0)
numRegionsToClose = Math.min(numRegionsToClose, maxRegToClose);
if (numRegionsToClose > 0){
unassignSomeRegions(info, numRegionsToClose, mostLoadedRegions,
returnMsgs);
}
}
/*
* Check if server load is not overloaded (with load > avgLoadPlusSlop).
* @return number of regions to unassign.
*/
private int balanceFromOverloaded(HServerLoad srvLoad, double avgLoad) {
int avgLoadPlusSlop = (int)Math.ceil(avgLoad * (1 + this.slop));
int numSrvRegs = srvLoad.getNumberOfRegions();
if (numSrvRegs > avgLoadPlusSlop) {
if (LOG.isDebugEnabled()) {
LOG.debug("Server is overloaded. Server load: " + numSrvRegs +
" avg: " + avgLoad + ", slop: " + this.slop);
}
return numSrvRegs - (int)Math.ceil(avgLoad);
}
return 0;
}
/*
* Check if server is most loaded and can be unloaded to
* low loaded servers (with load < avgLoadMinusSlop).
* @return number of regions to unassign.
*/
private int balanceToLowloaded(String srvName, HServerLoad srvLoad,
double avgLoad) {
SortedMap<HServerLoad, Set<String>> loadToServers =
master.serverManager.getLoadToServers();
// check if server most loaded
if (!loadToServers.get(loadToServers.lastKey()).contains(srvName))
return 0;
// this server is most loaded, we will try to unload it by lowest
// loaded servers
int avgLoadMinusSlop = (int)Math.floor(avgLoad * (1 - this.slop)) - 1;
int lowestLoad = loadToServers.firstKey().getNumberOfRegions();
if(lowestLoad >= avgLoadMinusSlop)
return 0; // there is no low loaded servers
int lowSrvCount = loadToServers.get(loadToServers.firstKey()).size();
int numRegionsToClose = 0;
int numSrvRegs = srvLoad.getNumberOfRegions();
int numMoveToLowLoaded = (avgLoadMinusSlop - lowestLoad) * lowSrvCount;
numRegionsToClose = numSrvRegs - (int)Math.ceil(avgLoad);
numRegionsToClose = Math.min(numRegionsToClose, numMoveToLowLoaded);
if (LOG.isDebugEnabled()) {
LOG.debug("Server " + srvName + " will be unloaded for " +
"balance. Server load: " + numSrvRegs + " avg: " +
avgLoad + ", regions can be moved: " + numMoveToLowLoaded);
}
return numRegionsToClose;
}
}
/*
* State of a Region as it transitions from closed to open, etc. See
* note on regionsInTransition data member above for listing of state

View File

@ -174,7 +174,10 @@ class ServerManager implements HConstants {
Set<String> servers = loadToServers.get(load);
if (servers != null) {
servers.remove(serverName);
loadToServers.put(load, servers);
if (servers.size() > 0)
loadToServers.put(load, servers);
else
loadToServers.remove(load);
}
}
}
@ -374,7 +377,10 @@ class ServerManager implements HConstants {
// Note that servers should never be null because loadToServers
// and serversToLoad are manipulated in pairs
servers.remove(serverInfo.getServerName());
loadToServers.put(load, servers);
if (servers.size() > 0)
loadToServers.put(load, servers);
else
loadToServers.remove(load);
}
}
}
@ -641,7 +647,10 @@ class ServerManager implements HConstants {
Set<String> servers = loadToServers.get(load);
if (servers != null) {
servers.remove(serverName);
loadToServers.put(load, servers);
if(servers.size() > 0)
loadToServers.put(load, servers);
else
loadToServers.remove(load);
}
}
}
@ -664,7 +673,7 @@ class ServerManager implements HConstants {
for (HServerLoad load : serversToLoad.values()) {
totalLoad += load.getNumberOfRegions();
}
averageLoad = Math.ceil((double)totalLoad / (double)numServers);
averageLoad = (double)totalLoad / (double)numServers;
}
return averageLoad;
}
@ -707,6 +716,15 @@ class ServerManager implements HConstants {
}
}
/**
* @return Read-only map of load to servers.
*/
SortedMap<HServerLoad, Set<String>> getLoadToServers() {
synchronized (loadToServers) {
return Collections.unmodifiableSortedMap(loadToServers);
}
}
/**
* Wakes up threads waiting on serversToServerInfo
*/
@ -775,7 +793,10 @@ class ServerManager implements HConstants {
Set<String> servers = loadToServers.get(load);
if (servers != null) {
servers.remove(serverName);
loadToServers.put(load, servers);
if(servers.size() > 0)
loadToServers.put(load, servers);
else
loadToServers.remove(load);
}
}
}

View File

@ -122,6 +122,12 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
LOG.debug("Adding 4th region server");
cluster.startRegionServer();
assertRegionsAreBalanced();
for (int i = 0; i < 6; i++){
LOG.debug("Adding " + (i + 5) + "th region server");
cluster.startRegionServer();
}
assertRegionsAreBalanced();
}
/** figure out how many regions are currently being served. */
@ -140,6 +146,8 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
*/
private void assertRegionsAreBalanced() {
boolean success = false;
float slop = conf.getFloat("hbase.regions.slop", (float)0.1);
if (slop <= 0) slop = 1;
for (int i = 0; i < 5; i++) {
success = true;
@ -148,14 +156,20 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
int regionCount = getRegionCount();
List<HRegionServer> servers = getOnlineRegionServers();
double avg = Math.ceil((double)regionCount / (double)servers.size());
double avg = cluster.getMaster().getAverageLoad();
int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
LOG.debug("There are " + servers.size() + " servers and " + regionCount
+ " regions. Load Average: " + avg);
+ " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
+ ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
for (HRegionServer server : servers) {
int serverLoad = server.getOnlineRegions().size();
LOG.debug(server.hashCode() + " Avg: " + avg + " actual: " + serverLoad);
if (!(serverLoad <= avg + 2 && serverLoad >= avg - 2)) {
if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
&& serverLoad >= avgLoadMinusSlop)) {
LOG.debug(server.hashCode() + " Isn't balanced!!! Avg: " + avg +
" actual: " + serverLoad + " slop: " + slop);
success = false;
}
}
@ -216,4 +230,4 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
region.getLog().closeAndDelete();
return region;
}
}
}