HBASE-19148 Reevaluate default values of configurations

Removed unused: <name>hbase.fs.tmp.dir</name> Added hbase.master.loadbalance.bytable Edit of description text. Moved stuff around to put configs beside each other. M hbase-server/src/main/java/org/apache/hadoop/hbase/util/ServerCommandLine.java Emit some hbase configs in log on startup. Signed-off-by: Michael Stack <stack@apache.org>
2017-12-15 17:56:38 -08:00 · 2017-12-15 17:56:38 -08:00 · 4d6b928682
commit 4d6b928682
parent 59baf12c03
5 changed files with 140 additions and 88 deletions
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@ -62,13 +62,6 @@ possible configurations would overwhelm and obscure the important.
    so change this configuration or else all data will be lost on
    machine restart.</description>
  </property>
-  <property >
-    <name>hbase.fs.tmp.dir</name>
-    <value>/user/${user.name}/hbase-staging</value>
-    <description>A staging directory in default file system (HDFS)
-    for keeping temporary data.
-    </description>
-  </property>
  <property >
    <name>hbase.cluster.distributed</name>
    <value>false</value>
@ -146,8 +139,8 @@ possible configurations would overwhelm and obscure the important.
  <property>
    <name>hbase.master.procedurewalcleaner.ttl</name>
    <value>604800000</value>
-    <description>How long a Procedure WAL stays will remain in the
-    {hbase.rootdir}/oldWALs/masterProcedureWALs directory, after which it will be cleaned
+    <description>How long a Procedure WAL will remain in the
+    {hbase.rootdir}/MasterProcedureWALs directory, after which it will be cleaned
    by a Master thread. The value is in milliseconds.</description>
  </property>
  <property>
@ -166,7 +159,19 @@ possible configurations would overwhelm and obscure the important.
    <value>true</value>
    <description>Whether or not the Master listens to the Master web
      UI port (hbase.master.info.port) and redirects requests to the web
-      UI server shared by the Master and RegionServer.</description>
+      UI server shared by the Master and RegionServer. Config. makes
+      sense when Master is serving Regions (not the default).</description>
+  </property>
+  <property>
+    <name>hbase.master.fileSplitTimeout</name>
+    <value>600000</value>
+    <description>Splitting a region, how long to wait on the file-splitting
+      step before aborting the attempt. Default: 600000. This setting used
+      to be known as hbase.regionserver.fileSplitTimeout in hbase-1.x.
+      Split is now run master-side hence the rename (If a
+      'hbase.master.fileSplitTimeout' setting found, will use it to
+      prime the current 'hbase.master.fileSplitTimeout'
+      Configuration.</description>
  </property>

  <!--RegionServer configurations-->
@ -198,7 +203,10 @@ possible configurations would overwhelm and obscure the important.
    <name>hbase.regionserver.handler.count</name>
    <value>30</value>
    <description>Count of RPC Listener instances spun up on RegionServers.
-    Same property is used by the Master for count of master handlers.</description>
+      Same property is used by the Master for count of master handlers.
+      Too many handlers can be counter-productive. Make it a multiple of
+      CPU count. If mostly read-only, handlers count close to cpu count
+      does well. Start with twice the CPU count and tune from there.</description>
  </property>
  <property>
    <name>hbase.ipc.server.callqueue.handler.factor</name>
@ -292,31 +300,37 @@ possible configurations would overwhelm and obscure the important.
      Updates are blocked and flushes are forced until size of all memstores
      in a region server hits hbase.regionserver.global.memstore.size.lower.limit.
      The default value in this configuration has been intentionally left empty in order to
-      honor the old hbase.regionserver.global.memstore.upperLimit property if present.</description>
+      honor the old hbase.regionserver.global.memstore.upperLimit property if present.
+    </description>
  </property>
  <property>
    <name>hbase.regionserver.global.memstore.size.lower.limit</name>
    <value></value>
-    <description>Maximum size of all memstores in a region server before flushes are forced.
-      Defaults to 95% of hbase.regionserver.global.memstore.size (0.95).
-      A 100% value for this value causes the minimum possible flushing to occur when updates are
-      blocked due to memstore limiting.
-      The default value in this configuration has been intentionally left empty in order to
-      honor the old hbase.regionserver.global.memstore.lowerLimit property if present.</description>
+    <description>Maximum size of all memstores in a region server before flushes
+      are forced. Defaults to 95% of hbase.regionserver.global.memstore.size
+      (0.95). A 100% value for this value causes the minimum possible flushing
+      to occur when updates are blocked due to memstore limiting. The default
+      value in this configuration has been intentionally left empty in order to
+      honor the old hbase.regionserver.global.memstore.lowerLimit property if
+      present.
+    </description>
  </property>
  <property>
    <name>hbase.systemtables.compacting.memstore.type</name>
    <value>NONE</value>
-    <description>Determines the type of memstore to be used for system tables like META, namespace tables etc.
-    By default NONE is the type and hence we use the default memstore for all the system tables. If we
-    need to use compacting memstore for system tables then set this property to BASIC/EAGER</description>
+    <description>Determines the type of memstore to be used for system tables like
+      META, namespace tables etc. By default NONE is the type and hence we use the
+      default memstore for all the system tables. If we need to use compacting
+      memstore for system tables then set this property to BASIC/EAGER
+    </description>
  </property>
  <property>
    <name>hbase.regionserver.optionalcacheflushinterval</name>
    <value>3600000</value>
    <description>
    Maximum amount of time an edit lives in memory before being automatically flushed.
-    Default 1 hour. Set it to 0 to disable automatic flushing.</description>
+    Default 1 hour. Set it to 0 to disable automatic flushing.
+  </description>
  </property>
  <property>
    <name>hbase.regionserver.dns.interface</name>
@ -335,19 +349,21 @@ possible configurations would overwhelm and obscure the important.
    <name>hbase.regionserver.region.split.policy</name>
    <value>org.apache.hadoop.hbase.regionserver.SteppingSplitPolicy</value>
    <description>
-      A split policy determines when a region should be split. The various other split policies that
-      are available currently are BusyRegionSplitPolicy, ConstantSizeRegionSplitPolicy, DisabledRegionSplitPolicy,
-      DelimitedKeyPrefixRegionSplitPolicy, KeyPrefixRegionSplitPolicy, and SteppingSplitPolicy.
-      DisabledRegionSplitPolicy blocks manual region splitting.
+      A split policy determines when a region should be split. The various
+      other split policies that are available currently are BusyRegionSplitPolicy,
+      ConstantSizeRegionSplitPolicy, DisabledRegionSplitPolicy,
+      DelimitedKeyPrefixRegionSplitPolicy, KeyPrefixRegionSplitPolicy, and
+      SteppingSplitPolicy. DisabledRegionSplitPolicy blocks manual region splitting.
    </description>
  </property>
  <property>
    <name>hbase.regionserver.regionSplitLimit</name>
    <value>1000</value>
    <description>
-      Limit for the number of regions after which no more region splitting should take place.
-      This is not hard limit for the number of regions but acts as a guideline for the regionserver
-      to stop splitting after a certain limit. Default is set to 1000.
+      Limit for the number of regions after which no more region splitting
+      should take place. This is not hard limit for the number of regions
+      but acts as a guideline for the regionserver to stop splitting after
+      a certain limit. Default is set to 1000.
    </description>
  </property>

@ -357,14 +373,15 @@ possible configurations would overwhelm and obscure the important.
    <value>90000</value>
    <description>ZooKeeper session timeout in milliseconds. It is used in two different ways.
      First, this value is used in the ZK client that HBase uses to connect to the ensemble.
-      It is also used by HBase when it starts a ZK server and it is passed as the 'maxSessionTimeout'. See
-      http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions.
+      It is also used by HBase when it starts a ZK server and it is passed as the 'maxSessionTimeout'.
+      See http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions.
      For example, if an HBase region server connects to a ZK ensemble that's also managed
-      by HBase, then the
-      session timeout will be the one specified by this configuration. But, a region server that connects
-      to an ensemble managed with a different configuration will be subjected that ensemble's maxSessionTimeout. So,
-      even though HBase might propose using 90 seconds, the ensemble can have a max timeout lower than this and
-      it will take precedence. The current default that ZK ships with is 40 seconds, which is lower than HBase's.
+      by HBase, then the session timeout will be the one specified by this configuration.
+      But, a region server that connects to an ensemble managed with a different configuration
+      will be subjected that ensemble's maxSessionTimeout. So, even though HBase might propose
+      using 90 seconds, the ensemble can have a max timeout lower than this and it will take
+      precedence. The current default that ZK ships with is 40 seconds, which is lower than
+      HBase's.
    </description>
  </property>
  <property>
@ -373,7 +390,8 @@ possible configurations would overwhelm and obscure the important.
    <description>Root ZNode for HBase in ZooKeeper. All of HBase's ZooKeeper
      files that are configured with a relative path will go under this node.
      By default, all of HBase's ZooKeeper file paths are configured with a
-      relative path, so they will all go under this directory unless changed.</description>
+      relative path, so they will all go under this directory unless changed.
+    </description>
  </property>
  <property>
    <name>zookeeper.znode.acl.parent</name>
@ -1119,6 +1137,26 @@ possible configurations would overwhelm and obscure the important.
    A coprocessor can also be loaded on demand by setting HTableDescriptor or the
    HBase shell.</description>
  </property>
+  <property>
+    <name>hbase.coprocessor.master.classes</name>
+    <value></value>
+    <description>A comma-separated list of
+    org.apache.hadoop.hbase.coprocessor.MasterObserver coprocessors that are
+    loaded by default on the active HMaster process. For any implemented
+    coprocessor methods, the listed classes will be called in order. After
+    implementing your own MasterObserver, just put it in HBase's classpath
+    and add the fully qualified class name here.</description>
+  </property>
+  <property>
+      <name>hbase.coprocessor.abortonerror</name>
+      <value>true</value>
+      <description>Set to true to cause the hosting server (master or regionserver)
+      to abort if a coprocessor fails to load, fails to initialize, or throws an
+      unexpected Throwable object. Setting this to false will allow the server to
+      continue execution but the system wide state of the coprocessor in question
+      will become inconsistent as it will be properly executing in only a subset
+      of servers, so this is most useful for debugging only.</description>
+  </property>
  <property>
    <name>hbase.rest.port</name>
    <value>8080</value>
@ -1171,26 +1209,6 @@ possible configurations would overwhelm and obscure the important.
    seems to be for and old version of HBase (\${hbase.version}), this
    version is X.X.X-SNAPSHOT"</description>
  </property>
-  <property>
-    <name>hbase.coprocessor.master.classes</name>
-    <value></value>
-    <description>A comma-separated list of
-    org.apache.hadoop.hbase.coprocessor.MasterObserver coprocessors that are
-    loaded by default on the active HMaster process. For any implemented
-    coprocessor methods, the listed classes will be called in order. After
-    implementing your own MasterObserver, just put it in HBase's classpath
-    and add the fully qualified class name here.</description>
-  </property>
-  <property>
-      <name>hbase.coprocessor.abortonerror</name>
-      <value>true</value>
-      <description>Set to true to cause the hosting server (master or regionserver)
-      to abort if a coprocessor fails to load, fails to initialize, or throws an
-      unexpected Throwable object. Setting this to false will allow the server to
-      continue execution but the system wide state of the coprocessor in question
-      will become inconsistent as it will be properly executing in only a subset
-      of servers, so this is most useful for debugging only.</description>
-  </property>
  <property>
    <name>hbase.table.lock.enable</name>
    <value>true</value>
@ -1388,7 +1406,6 @@ possible configurations would overwhelm and obscure the important.
      are NULL, CRC32, CRC32C.
    </description>
  </property>
-
  <property>
    <name>hbase.client.scanner.max.result.size</name>
    <value>2097152</value>
@ -1398,7 +1415,6 @@ possible configurations would overwhelm and obscure the important.
    With faster and/or high latency networks this value should be increased.
    </description>
  </property>
-
  <property>
    <name>hbase.server.scanner.max.result.size</name>
    <value>104857600</value>
@ -1408,7 +1424,6 @@ possible configurations would overwhelm and obscure the important.
    This is a safety setting to protect the server from OOM situations.
    </description>
  </property>
-
  <property>
    <name>hbase.status.published</name>
    <value>false</value>
@ -1447,7 +1462,6 @@ possible configurations would overwhelm and obscure the important.
      Multicast port to use for the status publication by multicast.
    </description>
  </property>
-
  <property>
    <name>hbase.dynamic.jars.dir</name>
    <value>${hbase.rootdir}/lib</value>
@ -1486,6 +1500,13 @@ possible configurations would overwhelm and obscure the important.
      as the SimpleLoadBalancer).
    </description>
  </property>
+  <property>
+    <name>hbase.master.loadbalance.bytable</name>
+    <value>false</value>
+    <description>Factor Table name when the balancer runs.
+      Default: false.
+    </description>
+  </property>
  <property>
    <name>hbase.master.normalizer.class</name>
    <value>org.apache.hadoop.hbase.master.normalizer.SimpleRegionNormalizer</value>
@ -1604,8 +1625,8 @@ possible configurations would overwhelm and obscure the important.
    <name>hbase.security.visibility.mutations.checkauths</name>
    <value>false</value>
    <description>
-      This property if enabled, will check whether the labels in the visibility expression are associated
-      with the user issuing the mutation
+      This property if enabled, will check whether the labels in the visibility
+      expression are associated with the user issuing the mutation
    </description>
  </property>
  <property>
@ -1633,9 +1654,9 @@ possible configurations would overwhelm and obscure the important.
    <description>
        The maximum number of threads any replication source will use for
        shipping edits to the sinks in parallel. This also limits the number of
-        chunks each replication batch is broken into.
-        Larger values can improve the replication throughput between the master and
-        slave clusters. The default of 10 will rarely need to be changed.
+        chunks each replication batch is broken into. Larger values can improve
+        the replication throughput between the master and slave clusters. The
+        default of 10 will rarely need to be changed.
    </description>
  </property>
  <property>
@ -1644,22 +1665,22 @@ possible configurations would overwhelm and obscure the important.
    <description>
      By default, in replication we can not make sure the order of operations in slave cluster is
      same as the order in master. If set REPLICATION_SCOPE to 2, we will push edits by the order
-      of written. This configure is to set how long (in ms) we will wait before next checking if a
-      log can not push right now because there are some logs written before it have not been pushed.
-      A larger waiting will decrease the number of queries on hbase:meta but will enlarge the delay
-      of replication. This feature relies on zk-less assignment, so users must set
+      of written. This configuration is to set how long (in ms) we will wait before next checking if
+      a log can NOT be pushed because there are some logs written before it that have yet to be
+      pushed. A larger waiting will decrease the number of queries on hbase:meta but will enlarge
+      the delay of replication. This feature relies on zk-less assignment, so users must set
      hbase.assignment.usezk to false to support it.
    </description>
  </property>
  <!-- Static Web User Filter properties. -->
  <property>
+    <name>hbase.http.staticuser.user</name>
+    <value>dr.stack</value>
    <description>
      The user name to filter as, on static web filters
      while rendering content. An example use is the HDFS
      web UI (user to be used for browsing files).
    </description>
-    <name>hbase.http.staticuser.user</name>
-    <value>dr.stack</value>
  </property>
  <property>
    <name>hbase.regionserver.handler.abort.on.error.percent</name>
@ -1703,10 +1724,10 @@ possible configurations would overwhelm and obscure the important.
    <value>86400</value>
    <description>
      The period that ExpiredMobFileCleanerChore runs. The unit is second.
-      The default value is one day.
-      The MOB file name uses only the date part of the file creation time in it. We use this
-      time for deciding TTL expiry of the files. So the removal of TTL expired files might be
-      delayed. The max delay might be 24 hrs.
+      The default value is one day. The MOB file name uses only the date part of
+      the file creation time in it. We use this time for deciding TTL expiry of
+      the files. So the removal of TTL expired files might be delayed. The max
+      delay might be 24 hrs.
    </description>
  </property>
  <property>
@ -1764,14 +1785,14 @@ possible configurations would overwhelm and obscure the important.
    <name>hbase.snapshot.master.timeout.millis</name>
    <value>300000</value>
    <description>
-       Timeout for master for the snapshot procedure execution
+       Timeout for master for the snapshot procedure execution.
    </description>
   </property>
     <property>
    <name>hbase.snapshot.region.timeout</name>
    <value>300000</value>
    <description>
-       Timeout for regionservers to keep threads in snapshot request pool waiting
+       Timeout for regionservers to keep threads in snapshot request pool waiting.
    </description>
   </property>
   <property>
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@ -1399,7 +1399,7 @@ public class HMaster extends HRegionServer implements MasterServices {
        }
      }

-      boolean isByTable = getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false);
+      boolean isByTable = getConfiguration().getBoolean("hbase.master.loadbalance.bytable", true);
      Map<TableName, Map<ServerName, List<RegionInfo>>> assignmentsByTable =
        this.assignmentManager.getRegionStates().getAssignmentsByTable(!isByTable);

--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
@ -609,8 +609,11 @@ public class SplitTableRegionProcedure
    // Shutdown the pool
    threadPool.shutdown();

-    // Wait for all the tasks to finish
-    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 30000);
+    // Wait for all the tasks to finish.
+    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
+    // hbase.regionserver.fileSplitTimeout. If set, use its value.
+    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
+        conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
    try {
      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
      if (stillRunning) {
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java
@ -402,12 +402,18 @@ public abstract class AbstractFSWAL<W extends WriterBase> implements WAL {
    }
    this.coprocessorHost = new WALCoprocessorHost(this, conf);

-    // Get size to roll log at. Roll at 95% of HDFS block size so we avoid crossing HDFS blocks
-    // (it costs a little x'ing bocks)
+    // Schedule a WAL roll when the WAL is 50% of the HDFS block size. Scheduling at 50% of block
+    // size should make it so WAL rolls before we get to the end-of-block (Block transitions cost
+    // some latency). In hbase-1 we did this differently. We scheduled a roll when we hit 95% of
+    // the block size but experience from the field has it that this was not enough time for the
+    // roll to happen before end-of-block. So the new accounting makes WALs of about the same
+    // size as those made in hbase-1 (to prevent surprise), we now have default block size as
+    // 2 times the DFS default: i.e. 2 * DFS default block size rolling at 50% full will generally
+    // make similar size logs to 1 * DFS default block size rolling at 95% full. See HBASE-19148.
    final long blocksize = this.conf.getLong("hbase.regionserver.hlog.blocksize",
-      CommonFSUtils.getDefaultBlockSize(this.fs, this.walDir));
+      CommonFSUtils.getDefaultBlockSize(this.fs, this.walDir) * 2);
    this.logrollsize =
-      (long) (blocksize * conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f));
+      (long) (blocksize * conf.getFloat("hbase.regionserver.logroll.multiplier", 0.5f));

    boolean maxLogsDefined = conf.get("hbase.regionserver.maxlogs") != null;
    if (maxLogsDefined) {
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ServerCommandLine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ServerCommandLine.java
@ -26,14 +26,16 @@ import java.util.Locale;
 import java.util.Map.Entry;
 import java.util.Set;

-import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 /**
 * Base class for command lines that start up various HBase daemons.
@ -83,6 +85,23 @@ public abstract class ServerCommandLine extends Configured implements Tool {
    }
  }

+  /**
+   * Print into log some of the important hbase attributes.
+   */
+  private static void logHBaseConfigs(Configuration conf) {
+    final String [] keys = new String [] {
+      // Expand this list as you see fit.
+      "hbase.tmp.dir",
+      HConstants.HBASE_DIR,
+      HConstants.CLUSTER_DISTRIBUTED,
+      HConstants.ZOOKEEPER_QUORUM,
+
+    };
+    for (String key: keys) {
+      LOG.info(key + ": " + conf.get(key));
+    }
+  }
+
  /**
   * Logs information about the currently running JVM process including
   * the environment variables. Logging of env vars can be disabled by
@ -92,6 +111,8 @@ public abstract class ServerCommandLine extends Configured implements Tool {
   * to comma separated list of such substrings.
   */
  public static void logProcessInfo(Configuration conf) {
+    logHBaseConfigs(conf);
+
    // log environment variables unless asked not to
    if (conf == null || !conf.getBoolean("hbase.envvars.logging.disabled", false)) {
      Set<String> skipWords = new HashSet<>(DEFAULT_SKIP_WORDS);
@ -114,6 +135,7 @@ public abstract class ServerCommandLine extends Configured implements Tool {
        LOG.info("env:"+entry);
      }
    }
+
    // and JVM info
    logJVMInfo();
  }