HBASE-920 Make region balancing sloppier

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@704781 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-10-15 05:18:31 +00:00
parent 50a795c543
commit db217e6a15
4 changed files with 90 additions and 77 deletions

View File

@ -40,6 +40,7 @@ Release 0.19.0 - Unreleased
HBASE-908 Add approximate counting to CountingBloomFilter HBASE-908 Add approximate counting to CountingBloomFilter
(Andrzej Bialecki via Stack) (Andrzej Bialecki via Stack)
HBASE-576 Investigate IPC performance HBASE-576 Investigate IPC performance
HBASE-920 Make region balancing sloppier
NEW FEATURES NEW FEATURES
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters] HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]

View File

@ -22,14 +22,6 @@
*/ */
--> -->
<configuration> <configuration>
<property>
<name>hbase.master</name>
<value>local</value>
<description>The host and port that the HBase master runs at.
A value of 'local' runs the master and a regionserver in
a single process.
</description>
</property>
<property> <property>
<name>hbase.rootdir</name> <name>hbase.rootdir</name>
<value>file:///tmp/hbase-${user.name}/hbase</value> <value>file:///tmp/hbase-${user.name}/hbase</value>
@ -38,6 +30,14 @@
E.g: hdfs://NAMENODE_SERVER:PORT/HBASE_ROOTDIR E.g: hdfs://NAMENODE_SERVER:PORT/HBASE_ROOTDIR
</description> </description>
</property> </property>
<property>
<name>hbase.master</name>
<value>local</value>
<description>The host and port that the HBase master runs at.
A value of 'local' runs the master and a regionserver in
a single process.
</description>
</property>
<property> <property>
<name>hbase.master.info.port</name> <name>hbase.master.info.port</name>
<value>60010</value> <value>60010</value>
@ -51,6 +51,21 @@
<description>The address for the hbase master web UI <description>The address for the hbase master web UI
</description> </description>
</property> </property>
<property>
<name>hbase.master.meta.thread.rescanfrequency</name>
<value>60000</value>
<description>How long the HMaster sleeps (in milliseconds) between scans of
the root and meta tables.
</description>
</property>
<property>
<name>hbase.master.lease.period</name>
<value>120000</value>
<description>HMaster server lease period in milliseconds. Default is
120 seconds. Region servers must report in within this period else
they are considered dead. On loaded cluster, may need to up this
period.</description>
</property>
<property> <property>
<name>hbase.regionserver</name> <name>hbase.regionserver</name>
<value>0.0.0.0:60020</value> <value>0.0.0.0:60020</value>
@ -110,28 +125,6 @@
calls of next may take longer and longer times when the cache is empty. calls of next may take longer and longer times when the cache is empty.
</description> </description>
</property> </property>
<property>
<name>hbase.master.meta.thread.rescanfrequency</name>
<value>60000</value>
<description>How long the HMaster sleeps (in milliseconds) between scans of
the root and meta tables.
</description>
</property>
<property>
<name>hbase.master.lease.period</name>
<value>120000</value>
<description>HMaster server lease period in milliseconds. Default is
120 seconds. Region servers must report in within this period else
they are considered dead. On loaded cluster, may need to up this
period.</description>
</property>
<property>
<name>hbase.hbasemaster.maxregionopen</name>
<value>120000</value>
<description>Period to wait for a region open. If regionserver
takes longer than this interval, assign to a new regionserver.
</description>
</property>
<property> <property>
<name>hbase.regionserver.lease.period</name> <name>hbase.regionserver.lease.period</name>
<value>60000</value> <value>60000</value>
@ -139,13 +132,6 @@
60 seconds. Clients must report in within this period else they are 60 seconds. Clients must report in within this period else they are
considered dead.</description> considered dead.</description>
</property> </property>
<property>
<name>hbase.server.thread.wakefrequency</name>
<value>10000</value>
<description>Time to sleep in between searches for work (in milliseconds).
Used as sleep interval by service threads such as META scanner and log roller.
</description>
</property>
<property> <property>
<name>hbase.regionserver.handler.count</name> <name>hbase.regionserver.handler.count</name>
<value>10</value> <value>10</value>
@ -189,6 +175,50 @@
Default: 30 minutes (in miliseconds) Default: 30 minutes (in miliseconds)
</description> </description>
</property> </property>
<property>
<name>hbase.regionserver.thread.splitcompactcheckfrequency</name>
<value>20000</value>
<description>How often a region server runs the split/compaction check.
</description>
</property>
<property>
<name>hbase.regionserver.nbreservationblocks</name>
<value>4</value>
<description>The number of reservation blocks which are used to prevent
unstable region servers caused by an OOME.
</description>
</property>
<property>
<name>hbase.regionserver.globalMemcacheLimit</name>
<value>536870912</value>
<description>Maximum size of all memcaches in a region server before new
updates are blocked and flushes are forced. Defaults to 512MB.
</description>
</property>
<property>
<name>hbase.regionserver.globalMemcacheLimitlowMark</name>
<value>256435456</value>
<description>When memcaches are being forced to flush to make room in
memory, keep flushing until we hit this mark. Defaults to 256MB. Setting
this value equal to hbase.regionserver.globalmemcachelimit causes the
minimum possible flushing to occur when updates are blocked due to
memcache limiting.
</description>
</property>
<property>
<name>hbase.hbasemaster.maxregionopen</name>
<value>120000</value>
<description>Period to wait for a region open. If regionserver
takes longer than this interval, assign to a new regionserver.
</description>
</property>
<property>
<name>hbase.server.thread.wakefrequency</name>
<value>10000</value>
<description>Time to sleep in between searches for work (in milliseconds).
Used as sleep interval by service threads such as META scanner and log roller.
</description>
</property>
<property> <property>
<name>hbase.hregion.memcache.flush.size</name> <name>hbase.hregion.memcache.flush.size</name>
<value>67108864</value> <value>67108864</value>
@ -234,12 +264,6 @@
If too large, clients timeout during compaction. If too large, clients timeout during compaction.
</description> </description>
</property> </property>
<property>
<name>hbase.regionserver.thread.splitcompactcheckfrequency</name>
<value>20000</value>
<description>How often a region server runs the split/compaction check.
</description>
</property>
<property> <property>
<name>hbase.hstore.compaction.max</name> <name>hbase.hstore.compaction.max</name>
<value>10</value> <value>10</value>
@ -254,10 +278,10 @@
</description> </description>
</property> </property>
<property> <property>
<name>hbase.regionserver.nbreservationblocks</name> <name>hbase.regions.slop</name>
<value>4</value> <value>0.1</value>
<description>The number of reservation blocks which are used to prevent <description>Rebalance if regionserver has average + (average * slop) regions.
unstable region servers caused by an OOME. Default is 10% slop.
</description> </description>
</property> </property>
<property> <property>
@ -283,23 +307,6 @@
<description>The size of each block in any block caches. <description>The size of each block in any block caches.
</description> </description>
</property> </property>
<property>
<name>hbase.regionserver.globalMemcacheLimit</name>
<value>536870912</value>
<description>Maximum size of all memcaches in a region server before new
updates are blocked and flushes are forced. Defaults to 512MB.
</description>
</property>
<property>
<name>hbase.regionserver.globalMemcacheLimitlowMark</name>
<value>256435456</value>
<description>When memcaches are being forced to flush to make room in
memory, keep flushing until we hit this mark. Defaults to 256MB. Setting
this value equal to hbase.regionserver.globalmemcachelimit causes the
minimum possible flushing to occur when updates are blocked due to
memcache limiting.
</description>
</property>
<property> <property>
<name>hbase.hash.type</name> <name>hbase.hash.type</name>
<value>murmur</value> <value>murmur</value>

View File

@ -118,14 +118,16 @@ class RegionManager implements HConstants {
private final int maxAssignInOneGo; private final int maxAssignInOneGo;
private final HMaster master; private final HMaster master;
private final RegionHistorian historian; private final RegionHistorian historian;
private final float slop;
RegionManager(HMaster master) { RegionManager(HMaster master) {
this.master = master; this.master = master;
this.historian = RegionHistorian.getInstance(); this.historian = RegionHistorian.getInstance();
this.maxAssignInOneGo = this.master.getConfiguration(). this.maxAssignInOneGo = this.master.getConfiguration().
getInt("hbase.regions.percheckin", 10); getInt("hbase.regions.percheckin", 10);
this.slop = this.master.getConfiguration().getFloat("hbase.regions.slop",
(float)0.1);
// The root region // The root region
rootScannerThread = new RootScanner(master, this); rootScannerThread = new RootScanner(master, this);
@ -183,10 +185,15 @@ class RegionManager implements HConstants {
// We only do load balancing once all regions are assigned. // We only do load balancing once all regions are assigned.
// This prevents churn while the cluster is starting up. // This prevents churn while the cluster is starting up.
double avgLoad = master.serverManager.getAverageLoad(); double avgLoad = master.serverManager.getAverageLoad();
if (avgLoad > 2.0 && thisServersLoad.getNumberOfRegions() > avgLoad) { double avgLoadWithSlop = avgLoad +
((this.slop != 0)? avgLoad * this.slop: avgLoad);
if (avgLoad > 2.0 &&
thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Server " + serverName + " is overloaded. Server load: " + LOG.debug("Server " + serverName +
thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad); " is overloaded. Server load: " +
thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
", slop: " + this.slop);
} }
unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions, unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions,
returnMsgs); returnMsgs);

View File

@ -36,6 +36,10 @@ import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.io.BlockFSInputStream; import org.apache.hadoop.hbase.io.BlockFSInputStream;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
@ -46,14 +50,8 @@ import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparable;
import org.onelab.filter.BloomFilter; import org.onelab.filter.BloomFilter;
import org.onelab.filter.HashFunction;
import org.onelab.filter.Key; import org.onelab.filter.Key;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HStoreKey;
/** /**
* A HStore data file. HStores usually have one or more of these files. They * A HStore data file. HStores usually have one or more of these files. They
* are produced by flushing the memcache to disk. * are produced by flushing the memcache to disk.