From 950b318877e2a7a9d3ee5061e69b675521dbfa2f Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 22 Jun 2011 05:26:13 +0000 Subject: [PATCH] HBASE-3986 troubleshooting.xml - adding client long-pause and NotServingRegionException entries git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1138304 13f79535-47bb-0310-9956-ffa450edef68 --- src/docbkx/troubleshooting.xml | 21 +++++++++++++++++ .../hbase/regionserver/HRegionServer.java | 23 +++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/docbkx/troubleshooting.xml b/src/docbkx/troubleshooting.xml index d114330abe4..731bc419ab9 100644 --- a/src/docbkx/troubleshooting.xml +++ b/src/docbkx/troubleshooting.xml @@ -442,6 +442,21 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j On your clients, edit $HBASE_HOME/conf/log4j.properties and change this: log4j.logger.org.apache.hadoop.hbase=DEBUG to this: log4j.logger.org.apache.hadoop.hbase=INFO, or even log4j.logger.org.apache.hadoop.hbase=WARN. +
+ Long Client Pauses With Compression + This is a fairly frequent question on the HBase dist-list. The scenario is that a client is typically inserting a lot of data into a + relatively un-optimized HBase cluster. Compression can exacerbate the pauses, although it is not the source of the problem. + See on the pattern for pre-creating regions and confirm that the table isn't starting with a single region. + See for cluster configuration, particularly hbase.hstore.blockingStoreFiles, hbase.hregion.memstore.block.multiplier, + MAX_FILESIZE (region size), and MEMSTORE_FLUSHSIZE. + A slightly longer explanation of why pauses can happen is as follows: Puts are sometimes blocked on the MemStores which are blocked by the flusher thread which is blocked because there are + too many files to compact because the compactor is given too many small files to compact and has to compact the same data repeatedly. This situation can occur even with minor compactions. + Compounding this situation, HBase doesn't compress data in memory. Thus, the 64MB that lives in the MemStore could become a 6MB file after compression - which results in a smaller StoreFile. The upside is that + more data is packed into the same region, but performance is achieved by being able to write larger files - which is why HBase waits until the flushize before writing a new StoreFile. And smaller StoreFiles + become targets for compaction. Without compression the files are much bigger and don't need as much compaction, however this is at the expense of I/O. + + +
@@ -586,6 +601,12 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi See for other general information about ZooKeeper troubleshooting.
+
+ NotServingRegionException + This exception is "normal" when found in the RegionServer logs at DEBUG level. This exception is returned back to the client + and then the client goes back to .META. to find the new location of the moved region. + However, if the NotServingRegionException is logged ERROR, then the client ran out of retries and something probably wrong. +
diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index bbfdcbe8fdc..c947995e9e5 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1050,12 +1050,22 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, */ private static class CompactionChecker extends Chore { private final HRegionServer instance; + private final int majorCompactPriority; + private final static int DEFAULT_PRIORITY = -1; CompactionChecker(final HRegionServer h, final int sleepTime, final Stoppable stopper) { super("CompactionChecker", sleepTime, h); this.instance = h; LOG.info("Runs every " + StringUtils.formatTime(sleepTime)); + + /* MajorCompactPriority is configurable. + * If not set, it will get the value of hbase.hstore.blockingStoreFiles, + * and the compaction will use default priority. + */ + this.majorCompactPriority = this.instance.conf. + getInt("hbase.regionserver.compactionChecker.majorCompactPriority", + DEFAULT_PRIORITY); } @Override @@ -1065,10 +1075,19 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, continue; for (Store s : r.getStores().values()) { try { - if (s.isMajorCompaction() || s.needsCompaction()) { + if (s.needsCompaction()) { // Queue a compaction. Will recognize if major is needed. this.instance.compactSplitThread.requestCompaction(r, s, - getName() + " requests major compaction"); + getName() + " requests compaction"); + } else if (s.isMajorCompaction()) { + if (majorCompactPriority == DEFAULT_PRIORITY ) { + this.instance.compactSplitThread.requestCompaction(r, s, + getName() + " requests major compaction; use default priority"); + } else { + this.instance.compactSplitThread.requestCompaction(r, s, + getName() + " requests major compaction; use configured priority", + this.majorCompactPriority); + } } } catch (IOException e) { LOG.warn("Failed major compaction check on " + r, e);