HBASE-920 Make region balancing sloppier

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@704781 13f79535-47bb-0310-9956-ffa450edef68
2008-10-15 05:18:31 +00:00 · 2008-10-15 05:18:31 +00:00 · db217e6a15
parent 50a795c543
commit db217e6a15
4 changed files with 90 additions and 77 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -40,6 +40,7 @@ Release 0.19.0 - Unreleased
   HBASE-908   Add approximate counting to CountingBloomFilter
               (Andrzej Bialecki via Stack)
   HBASE-576   Investigate IPC performance
+   HBASE-920   Make region balancing sloppier

  NEW FEATURES
   HBASE-875   Use MurmurHash instead of JenkinsHash [in bloomfilters]
--- a/conf/hbase-default.xml
+++ b/conf/hbase-default.xml
@ -22,14 +22,6 @@
 */
 -->
 <configuration>
-  <property>
-    <name>hbase.master</name>
-    <value>local</value>
-    <description>The host and port that the HBase master runs at.
-    A value of 'local' runs the master and a regionserver in
-    a single process.
-    </description>
-  </property>
  <property>
    <name>hbase.rootdir</name>
    <value>file:///tmp/hbase-${user.name}/hbase</value>
@ -38,6 +30,14 @@
    E.g: hdfs://NAMENODE_SERVER:PORT/HBASE_ROOTDIR
    </description>
  </property>
+  <property>
+    <name>hbase.master</name>
+    <value>local</value>
+    <description>The host and port that the HBase master runs at.
+    A value of 'local' runs the master and a regionserver in
+    a single process.
+    </description>
+  </property>
  <property>
    <name>hbase.master.info.port</name>
    <value>60010</value>
@ -51,6 +51,21 @@
    <description>The address for the hbase master web UI
    </description>
  </property>
+  <property>
+    <name>hbase.master.meta.thread.rescanfrequency</name>
+    <value>60000</value>
+    <description>How long the HMaster sleeps (in milliseconds) between scans of
+    the root and meta tables.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.lease.period</name>
+    <value>120000</value>
+    <description>HMaster server lease period in milliseconds. Default is
+    120 seconds.  Region servers must report in within this period else
+    they are considered dead.  On loaded cluster, may need to up this
+    period.</description>
+  </property>
  <property>
    <name>hbase.regionserver</name>
    <value>0.0.0.0:60020</value>
@ -110,28 +125,6 @@
    calls of next may take longer and longer times when the cache is empty.
    </description>
  </property>
-  <property>
-    <name>hbase.master.meta.thread.rescanfrequency</name>
-    <value>60000</value>
-    <description>How long the HMaster sleeps (in milliseconds) between scans of
-    the root and meta tables.
-    </description>
-  </property>
-  <property>
-    <name>hbase.master.lease.period</name>
-    <value>120000</value>
-    <description>HMaster server lease period in milliseconds. Default is
-    120 seconds.  Region servers must report in within this period else
-    they are considered dead.  On loaded cluster, may need to up this
-    period.</description>
-  </property>
-  <property>
-    <name>hbase.hbasemaster.maxregionopen</name>
-    <value>120000</value>
-    <description>Period to wait for a region open.  If regionserver
-    takes longer than this interval, assign to a new regionserver.
-    </description>
-  </property>
  <property>
    <name>hbase.regionserver.lease.period</name>
    <value>60000</value>
@ -139,13 +132,6 @@
    60 seconds. Clients must report in within this period else they are
    considered dead.</description>
  </property>
-  <property>
-    <name>hbase.server.thread.wakefrequency</name>
-    <value>10000</value>
-    <description>Time to sleep in between searches for work (in milliseconds).
-    Used as sleep interval by service threads such as META scanner and log roller.
-    </description>
-  </property>
  <property>
    <name>hbase.regionserver.handler.count</name>
    <value>10</value>
@ -189,6 +175,50 @@
    Default: 30 minutes (in miliseconds)
    </description>
  </property>
+  <property>
+    <name>hbase.regionserver.thread.splitcompactcheckfrequency</name>
+    <value>20000</value>
+    <description>How often a region server runs the split/compaction check.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.nbreservationblocks</name>
+    <value>4</value>
+    <description>The number of reservation blocks which are used to prevent
+    unstable region servers caused by an OOME.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.globalMemcacheLimit</name>
+    <value>536870912</value>
+    <description>Maximum size of all memcaches in a region server before new 
+      updates are blocked and flushes are forced. Defaults to 512MB.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.globalMemcacheLimitlowMark</name>
+    <value>256435456</value>
+    <description>When memcaches are being forced to flush to make room in
+      memory, keep flushing until we hit this mark. Defaults to 256MB. Setting
+      this value equal to hbase.regionserver.globalmemcachelimit causes the 
+      minimum possible flushing to occur when updates are blocked due to 
+      memcache limiting.
+    </description>
+  </property>  
+  <property>
+    <name>hbase.hbasemaster.maxregionopen</name>
+    <value>120000</value>
+    <description>Period to wait for a region open.  If regionserver
+    takes longer than this interval, assign to a new regionserver.
+    </description>
+  </property>
+  <property>
+    <name>hbase.server.thread.wakefrequency</name>
+    <value>10000</value>
+    <description>Time to sleep in between searches for work (in milliseconds).
+    Used as sleep interval by service threads such as META scanner and log roller.
+    </description>
+  </property>
  <property>
    <name>hbase.hregion.memcache.flush.size</name>
    <value>67108864</value>
@ -234,12 +264,6 @@
    If too large, clients timeout during compaction.
    </description>
  </property>
-  <property>
-    <name>hbase.regionserver.thread.splitcompactcheckfrequency</name>
-    <value>20000</value>
-    <description>How often a region server runs the split/compaction check.
-    </description>
-  </property>
  <property>
    <name>hbase.hstore.compaction.max</name>
    <value>10</value>
@ -254,10 +278,10 @@
    </description>
  </property>
  <property>
-    <name>hbase.regionserver.nbreservationblocks</name>
-    <value>4</value>
-    <description>The number of reservation blocks which are used to prevent
-    unstable region servers caused by an OOME.
+    <name>hbase.regions.slop</name>
+    <value>0.1</value>
+    <description>Rebalance if regionserver has average + (average * slop) regions.
+    Default is 10% slop.
    </description>
  </property>
  <property>
@ -283,23 +307,6 @@
    <description>The size of each block in any block caches.
    </description>
  </property>
-  <property>
-    <name>hbase.regionserver.globalMemcacheLimit</name>
-    <value>536870912</value>
-    <description>Maximum size of all memcaches in a region server before new 
-      updates are blocked and flushes are forced. Defaults to 512MB.
-    </description>
-  </property>
-  <property>
-    <name>hbase.regionserver.globalMemcacheLimitlowMark</name>
-    <value>256435456</value>
-    <description>When memcaches are being forced to flush to make room in
-      memory, keep flushing until we hit this mark. Defaults to 256MB. Setting
-      this value equal to hbase.regionserver.globalmemcachelimit causes the 
-      minimum possible flushing to occur when updates are blocked due to 
-      memcache limiting.
-    </description>
-  </property>  
  <property>
    <name>hbase.hash.type</name>
    <value>murmur</value>
--- a/src/java/org/apache/hadoop/hbase/master/RegionManager.java
+++ b/src/java/org/apache/hadoop/hbase/master/RegionManager.java
@ -118,15 +118,17 @@ class RegionManager implements HConstants {
  private final int maxAssignInOneGo;

  private final HMaster master;
-  
  private final RegionHistorian historian;
+  private final float slop;
  
  RegionManager(HMaster master) {
    this.master = master;
    this.historian = RegionHistorian.getInstance();
    this.maxAssignInOneGo = this.master.getConfiguration().
      getInt("hbase.regions.percheckin", 10);
-    
+    this.slop = this.master.getConfiguration().getFloat("hbase.regions.slop",
+      (float)0.1);
+
    // The root region
    rootScannerThread = new RootScanner(master, this);

@ -183,13 +185,18 @@ class RegionManager implements HConstants {
          // We only do load balancing once all regions are assigned.
          // This prevents churn while the cluster is starting up.
          double avgLoad = master.serverManager.getAverageLoad();
-          if (avgLoad > 2.0 && thisServersLoad.getNumberOfRegions() > avgLoad) {
+          double avgLoadWithSlop = avgLoad +
+            ((this.slop != 0)? avgLoad * this.slop: avgLoad);
+          if (avgLoad > 2.0 &&
+              thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
            if (LOG.isDebugEnabled()) {
-              LOG.debug("Server " + serverName + " is overloaded. Server load: " + 
-                  thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad);
+              LOG.debug("Server " + serverName +
+                " is overloaded. Server load: " + 
+                thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
+                ", slop: " + this.slop);
            }
-            unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions, 
-                returnMsgs);
+            unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions,
+              returnMsgs);
          }
        }
      } else {
--- a/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
@ -36,6 +36,10 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.io.BlockFSInputStream;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
@ -46,14 +50,8 @@ import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.onelab.filter.BloomFilter;
-import org.onelab.filter.HashFunction;
 import org.onelab.filter.Key;

-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HStoreKey;
-
 /**
 * A HStore data file.  HStores usually have one or more of these files.  They
 * are produced by flushing the memcache to disk.