HBASE-1017 Region balancing does not bring newly added node within acceptable range
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@776823 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a16ee0714c
commit
f37b552cbc
|
@ -272,6 +272,8 @@ Release 0.20.0 - Unreleased
|
|||
HBASE-1420 add abliity to add and remove (table) indexes on existing
|
||||
tables (Clint Morgan via Stack)
|
||||
HBASE-1430 Read the logs in batches during log splitting to avoid OOME
|
||||
HBASE-1017 Region balancing does not bring newly added node within
|
||||
acceptable range (Evgeny Ryabitskiy via Stack)
|
||||
|
||||
OPTIMIZATIONS
|
||||
HBASE-1412 Change values for delete column and column family in KeyValue
|
||||
|
|
|
@ -102,9 +102,9 @@ class RegionManager implements HConstants {
|
|||
// How many regions to assign a server at a time.
|
||||
private final int maxAssignInOneGo;
|
||||
|
||||
private final HMaster master;
|
||||
final HMaster master;
|
||||
private final RegionHistorian historian;
|
||||
private final float slop;
|
||||
private final LoadBalancer loadBalancer;
|
||||
|
||||
/** Set of regions to split. */
|
||||
private final SortedMap<byte[], Pair<HRegionInfo,HServerAddress>>
|
||||
|
@ -137,7 +137,7 @@ class RegionManager implements HConstants {
|
|||
this.master = master;
|
||||
this.historian = RegionHistorian.getInstance();
|
||||
this.maxAssignInOneGo = conf.getInt("hbase.regions.percheckin", 10);
|
||||
this.slop = conf.getFloat("hbase.regions.slop", (float)0.1);
|
||||
this.loadBalancer = new LoadBalancer(conf);
|
||||
|
||||
// The root region
|
||||
rootScannerThread = new RootScanner(master);
|
||||
|
@ -199,20 +199,7 @@ class RegionManager implements HConstants {
|
|||
if (!inSafeMode()) {
|
||||
// We only do load balancing once all regions are assigned.
|
||||
// This prevents churn while the cluster is starting up.
|
||||
double avgLoad = master.serverManager.getAverageLoad();
|
||||
double avgLoadWithSlop = avgLoad +
|
||||
((this.slop != 0)? avgLoad * this.slop: avgLoad);
|
||||
if (avgLoad > 2.0 &&
|
||||
thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Server " + info.getServerName() +
|
||||
" is overloaded. Server load: " +
|
||||
thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
|
||||
", slop: " + this.slop);
|
||||
}
|
||||
unassignSomeRegions(info, thisServersLoad,
|
||||
avgLoad, mostLoadedRegions, returnMsgs);
|
||||
}
|
||||
loadBalancer.loadBalancing(info, mostLoadedRegions, returnMsgs);
|
||||
}
|
||||
} else {
|
||||
// if there's only one server, just give it all the regions
|
||||
|
@ -432,10 +419,9 @@ class RegionManager implements HConstants {
|
|||
* Note that no synchronization is needed because the only caller
|
||||
* (assignRegions) whose caller owns the monitor for RegionManager
|
||||
*/
|
||||
private void unassignSomeRegions(final HServerInfo info,
|
||||
final HServerLoad load, final double avgLoad,
|
||||
final HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
|
||||
int numRegionsToClose = load.getNumberOfRegions() - (int)Math.ceil(avgLoad);
|
||||
void unassignSomeRegions(final HServerInfo info,
|
||||
int numRegionsToClose, final HRegionInfo[] mostLoadedRegions,
|
||||
ArrayList<HMsg> returnMsgs) {
|
||||
LOG.debug("Choosing to reassign " + numRegionsToClose
|
||||
+ " regions. mostLoadedRegions has " + mostLoadedRegions.length
|
||||
+ " regions in it.");
|
||||
|
@ -1154,6 +1140,115 @@ class RegionManager implements HConstants {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Class to balance region servers load.
|
||||
* It keeps Region Servers load in slop range by unassigning Regions
|
||||
* from most loaded servers.
|
||||
*
|
||||
* Equilibrium is reached when load of all serves are in slop range
|
||||
* [avgLoadMinusSlop, avgLoadPlusSlop], where
|
||||
* avgLoadPlusSlop = Math.ceil(avgLoad * (1 + this.slop)), and
|
||||
* avgLoadMinusSlop = Math.floor(avgLoad * (1 - this.slop)) - 1.
|
||||
*/
|
||||
private class LoadBalancer {
|
||||
private float slop; // hbase.regions.slop
|
||||
private final int maxRegToClose; // hbase.regions.close.max
|
||||
|
||||
LoadBalancer(HBaseConfiguration conf) {
|
||||
this.slop = conf.getFloat("hbase.regions.slop", (float)0.1);
|
||||
if (this.slop <= 0) this.slop = 1;
|
||||
//maxRegToClose to constrain balance closing per one iteration
|
||||
// -1 to turn off
|
||||
// TODO: change default in HBASE-862, need a suggestion
|
||||
this.maxRegToClose = conf.getInt("hbase.regions.close.max", -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Balance server load by unassigning some regions.
|
||||
*
|
||||
* @param info - server info
|
||||
* @param mostLoadedRegions - array of most loaded regions
|
||||
* @param returnMsgs - array of return massages
|
||||
*/
|
||||
void loadBalancing(HServerInfo info, HRegionInfo[] mostLoadedRegions,
|
||||
ArrayList<HMsg> returnMsgs) {
|
||||
HServerLoad servLoad = info.getLoad();
|
||||
double avg = master.serverManager.getAverageLoad();
|
||||
|
||||
// nothing to balance if server load not more then average load
|
||||
if (servLoad.getLoad() <= Math.ceil(avg) || avg <= 2.0) return;
|
||||
|
||||
// check if server is overloaded
|
||||
int numRegionsToClose = balanceFromOverloaded(servLoad, avg);
|
||||
|
||||
// check if we can unload server by low loaded servers
|
||||
if (numRegionsToClose <= 0)
|
||||
balanceToLowloaded(info.getServerName(), servLoad, avg);
|
||||
|
||||
if (maxRegToClose > 0)
|
||||
numRegionsToClose = Math.min(numRegionsToClose, maxRegToClose);
|
||||
|
||||
if (numRegionsToClose > 0){
|
||||
unassignSomeRegions(info, numRegionsToClose, mostLoadedRegions,
|
||||
returnMsgs);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if server load is not overloaded (with load > avgLoadPlusSlop).
|
||||
* @return number of regions to unassign.
|
||||
*/
|
||||
private int balanceFromOverloaded(HServerLoad srvLoad, double avgLoad) {
|
||||
int avgLoadPlusSlop = (int)Math.ceil(avgLoad * (1 + this.slop));
|
||||
int numSrvRegs = srvLoad.getNumberOfRegions();
|
||||
if (numSrvRegs > avgLoadPlusSlop) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Server is overloaded. Server load: " + numSrvRegs +
|
||||
" avg: " + avgLoad + ", slop: " + this.slop);
|
||||
}
|
||||
return numSrvRegs - (int)Math.ceil(avgLoad);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if server is most loaded and can be unloaded to
|
||||
* low loaded servers (with load < avgLoadMinusSlop).
|
||||
* @return number of regions to unassign.
|
||||
*/
|
||||
private int balanceToLowloaded(String srvName, HServerLoad srvLoad,
|
||||
double avgLoad) {
|
||||
|
||||
SortedMap<HServerLoad, Set<String>> loadToServers =
|
||||
master.serverManager.getLoadToServers();
|
||||
// check if server most loaded
|
||||
if (!loadToServers.get(loadToServers.lastKey()).contains(srvName))
|
||||
return 0;
|
||||
|
||||
// this server is most loaded, we will try to unload it by lowest
|
||||
// loaded servers
|
||||
int avgLoadMinusSlop = (int)Math.floor(avgLoad * (1 - this.slop)) - 1;
|
||||
int lowestLoad = loadToServers.firstKey().getNumberOfRegions();
|
||||
|
||||
if(lowestLoad >= avgLoadMinusSlop)
|
||||
return 0; // there is no low loaded servers
|
||||
|
||||
int lowSrvCount = loadToServers.get(loadToServers.firstKey()).size();
|
||||
int numRegionsToClose = 0;
|
||||
|
||||
int numSrvRegs = srvLoad.getNumberOfRegions();
|
||||
int numMoveToLowLoaded = (avgLoadMinusSlop - lowestLoad) * lowSrvCount;
|
||||
numRegionsToClose = numSrvRegs - (int)Math.ceil(avgLoad);
|
||||
numRegionsToClose = Math.min(numRegionsToClose, numMoveToLowLoaded);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Server " + srvName + " will be unloaded for " +
|
||||
"balance. Server load: " + numSrvRegs + " avg: " +
|
||||
avgLoad + ", regions can be moved: " + numMoveToLowLoaded);
|
||||
}
|
||||
return numRegionsToClose;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* State of a Region as it transitions from closed to open, etc. See
|
||||
* note on regionsInTransition data member above for listing of state
|
||||
|
|
|
@ -174,7 +174,10 @@ class ServerManager implements HConstants {
|
|||
Set<String> servers = loadToServers.get(load);
|
||||
if (servers != null) {
|
||||
servers.remove(serverName);
|
||||
loadToServers.put(load, servers);
|
||||
if (servers.size() > 0)
|
||||
loadToServers.put(load, servers);
|
||||
else
|
||||
loadToServers.remove(load);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -374,7 +377,10 @@ class ServerManager implements HConstants {
|
|||
// Note that servers should never be null because loadToServers
|
||||
// and serversToLoad are manipulated in pairs
|
||||
servers.remove(serverInfo.getServerName());
|
||||
loadToServers.put(load, servers);
|
||||
if (servers.size() > 0)
|
||||
loadToServers.put(load, servers);
|
||||
else
|
||||
loadToServers.remove(load);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -641,7 +647,10 @@ class ServerManager implements HConstants {
|
|||
Set<String> servers = loadToServers.get(load);
|
||||
if (servers != null) {
|
||||
servers.remove(serverName);
|
||||
loadToServers.put(load, servers);
|
||||
if(servers.size() > 0)
|
||||
loadToServers.put(load, servers);
|
||||
else
|
||||
loadToServers.remove(load);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -664,7 +673,7 @@ class ServerManager implements HConstants {
|
|||
for (HServerLoad load : serversToLoad.values()) {
|
||||
totalLoad += load.getNumberOfRegions();
|
||||
}
|
||||
averageLoad = Math.ceil((double)totalLoad / (double)numServers);
|
||||
averageLoad = (double)totalLoad / (double)numServers;
|
||||
}
|
||||
return averageLoad;
|
||||
}
|
||||
|
@ -707,6 +716,15 @@ class ServerManager implements HConstants {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Read-only map of load to servers.
|
||||
*/
|
||||
SortedMap<HServerLoad, Set<String>> getLoadToServers() {
|
||||
synchronized (loadToServers) {
|
||||
return Collections.unmodifiableSortedMap(loadToServers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wakes up threads waiting on serversToServerInfo
|
||||
*/
|
||||
|
@ -775,7 +793,10 @@ class ServerManager implements HConstants {
|
|||
Set<String> servers = loadToServers.get(load);
|
||||
if (servers != null) {
|
||||
servers.remove(serverName);
|
||||
loadToServers.put(load, servers);
|
||||
if(servers.size() > 0)
|
||||
loadToServers.put(load, servers);
|
||||
else
|
||||
loadToServers.remove(load);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,6 +122,12 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
|
|||
LOG.debug("Adding 4th region server");
|
||||
cluster.startRegionServer();
|
||||
assertRegionsAreBalanced();
|
||||
|
||||
for (int i = 0; i < 6; i++){
|
||||
LOG.debug("Adding " + (i + 5) + "th region server");
|
||||
cluster.startRegionServer();
|
||||
}
|
||||
assertRegionsAreBalanced();
|
||||
}
|
||||
|
||||
/** figure out how many regions are currently being served. */
|
||||
|
@ -140,6 +146,8 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
|
|||
*/
|
||||
private void assertRegionsAreBalanced() {
|
||||
boolean success = false;
|
||||
float slop = conf.getFloat("hbase.regions.slop", (float)0.1);
|
||||
if (slop <= 0) slop = 1;
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
success = true;
|
||||
|
@ -148,14 +156,20 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
|
|||
|
||||
int regionCount = getRegionCount();
|
||||
List<HRegionServer> servers = getOnlineRegionServers();
|
||||
double avg = Math.ceil((double)regionCount / (double)servers.size());
|
||||
double avg = cluster.getMaster().getAverageLoad();
|
||||
int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
|
||||
int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
|
||||
LOG.debug("There are " + servers.size() + " servers and " + regionCount
|
||||
+ " regions. Load Average: " + avg);
|
||||
+ " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
|
||||
+ ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
|
||||
|
||||
for (HRegionServer server : servers) {
|
||||
int serverLoad = server.getOnlineRegions().size();
|
||||
LOG.debug(server.hashCode() + " Avg: " + avg + " actual: " + serverLoad);
|
||||
if (!(serverLoad <= avg + 2 && serverLoad >= avg - 2)) {
|
||||
if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
|
||||
&& serverLoad >= avgLoadMinusSlop)) {
|
||||
LOG.debug(server.hashCode() + " Isn't balanced!!! Avg: " + avg +
|
||||
" actual: " + serverLoad + " slop: " + slop);
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
@ -216,4 +230,4 @@ public class TestRegionRebalancing extends HBaseClusterTestCase {
|
|||
region.getLog().closeAndDelete();
|
||||
return region;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue