HBASE-15392 Single Cell Get reads two HFileBlocks

M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java moreRowsMayExistAfterCell Exploit the fact a Scan is a Get Scan. Also save compares if no non-default stopRow. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java optimize Add doc on what is being optimized. Also, if a Get Scan, do not optimize else we'll keep going after our row is DONE. Another place to make use of the Get Scan fact is when we are DONE.. if Get Scan, we can close out the scan. Signed-off-by: stack <stack@apache.org>
2016-04-21 12:33:37 -07:00 · 2016-04-21 12:33:37 -07:00 · 27446a5c4a
parent 5e552e57a5
commit 27446a5c4a
8 changed files with 133 additions and 65 deletions
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java
@ -2035,6 +2035,11 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
      right.getRowArray(), right.getRowOffset(), right.getRowLength());
    }

+    public int compareRows(Cell left, byte[] right, int roffset, int rlength) {
+      return compareRows(left.getRowArray(), left.getRowOffset(), left.getRowLength(), right,
+          roffset, rlength);
+    }
+
    /**
     * Get the b[],o,l for left and right rowkey portions and compare.
     * @param left
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@ -61,8 +61,8 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
  @Override
  public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
      final boolean cacheDataInL1) {
-    boolean isMetaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA;
-    if (isMetaBlock || cacheDataInL1) {
+    boolean metaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA;
+    if (metaBlock || cacheDataInL1) {
      lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1);
    } else {
      l2Cache.cacheBlock(cacheKey, buf, inMemory, false);
@ -79,12 +79,9 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
      boolean repeat, boolean updateCacheMetrics) {
    // TODO: is there a hole here, or just awkwardness since in the lruCache getBlock
    // we end up calling l2Cache.getBlock.
-    if (lruCache.containsBlock(cacheKey)) {
-      return lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
-    }
-    Cacheable result = l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
-
-    return result;
+    return lruCache.containsBlock(cacheKey)?
+        lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics):
+        l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
  }

  @Override
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
@ -29,6 +29,9 @@ import org.apache.hadoop.hbase.client.Scan;
 * Scanner that returns the next KeyValue.
 */
@InterfaceAudience.Private
+// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
+// so this should be something else altogether, a decoration on our base CellScanner. TODO.
+// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
 public interface KeyValueScanner {
  /**
   * The byte array represents for NO_NEXT_INDEXED_KEY;
@ -161,8 +164,9 @@ public interface KeyValueScanner {
  public boolean seekToLastRow() throws IOException;

  /**
-   * @return the next key in the index (the key to seek to the next block)
-   * if known, or null otherwise
+   * @return the next key in the index, usually the first key of next block OR a key that falls
+   * between last key of current block and first key of next block..
+   * see HFileWriterImpl#getMidpoint, or null if not known.
   */
  public Cell getNextIndexedKey();
 }
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
@ -145,17 +145,17 @@ public class ScanQueryMatcher {

  private final boolean isReversed;

+  /**
+   * True if we are doing a 'Get' Scan. Every Get is actually a one-row Scan.
+   */
+  private final boolean get;
+
  /**
   * Construct a QueryMatcher for a scan
-   * @param scan
   * @param scanInfo The store's immutable scan info
-   * @param columns
   * @param scanType Type of the scan
   * @param earliestPutTs Earliest put seen in any of the store files.
-   * @param oldestUnexpiredTS the oldest timestamp we are interested in,
-   *  based on TTL
-   * @param regionCoprocessorHost 
-   * @throws IOException 
+   * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
   */
  public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
      ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS,
@ -166,6 +166,7 @@ public class ScanQueryMatcher {
    } else {
      this.tr = timeRange;
    }
+    this.get = scan.isGetScan();
    this.rowComparator = scanInfo.getComparator();
    this.regionCoprocessorHost = regionCoprocessorHost;
    this.deletes =  instantiateDeleteTracker();
@ -502,24 +503,27 @@ public class ScanQueryMatcher {
    }
  }

+  /**
+   * @return Returns false if we know there are no more rows to be scanned (We've reached the
+   * <code>stopRow</code> or we are scanning on row only because this Scan is for a Get, etc.
+   */
  public boolean moreRowsMayExistAfter(Cell kv) {
-    if (this.isReversed) {
-      if (rowComparator.compareRows(kv.getRowArray(), kv.getRowOffset(),
-          kv.getRowLength(), stopRow, 0, stopRow.length) <= 0) {
+    // If a 'get' Scan -- we are doing a Get (every Get is a single-row Scan in implementation) --
+    // then we are looking at one row only, the one specified in the Get coordinate..so we know
+    // for sure that there are no more rows on this Scan
+    if (this.get) {
      return false;
-      } else {
-        return true;
-      }
-    }
-    if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
-        rowComparator.compareRows(kv.getRowArray(),kv.getRowOffset(),
-            kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
-      // KV >= STOPROW
-      // then NO there is nothing left.
-      return false;
-    } else {
+    }
+    // If no stopRow, return that there may be more rows. The tests that follow depend on a
+    // non-empty, non-default stopRow so this little test below short-circuits out doing the
+    // following compares.
+    if (this.stopRow == null || this.stopRow == HConstants.EMPTY_BYTE_ARRAY) {
       return true;
    }
+    return this.isReversed?
+      rowComparator.compareRows(kv, stopRow, 0, stopRow.length) > 0:
+      Bytes.equals(stopRow, HConstants.EMPTY_END_ROW) ||
+        rowComparator.compareRows(kv, stopRow, 0, stopRow.length) < 0;
  }

  /**
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
@ -587,6 +587,12 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
          continue;

        case DONE:
+          // Optimization for Gets! If DONE, no more to get on this row, early exit!
+          if (this.scan.isGetScan()) {
+            // Then no more to this row... exit.
+            close();// Do all cleanup except heap.close()
+            return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
+          }
          // We are sure that this row is done and we are in the next row.
          // So subsequent StoresScanner.next() call need not do another compare
          // and set the matcher.row
@ -642,11 +648,60 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
    return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
  }

-  /*
-   * See if we should actually SEEK or rather just SKIP to the next Cell.
-   * (see HBASE-13109)
+  /**
+   * See if we should actually SEEK or rather just SKIP to the next Cell (see HBASE-13109).
+   * This method works together with ColumnTrackers and Filters. ColumnTrackers may issue SEEK
+   * hints, such as seek to next column, next row, or seek to an arbitrary seek key.
+   * This method intercepts these qcodes and decides whether a seek is the most efficient _actual_
+   * way to get us to the requested cell (SEEKs are more expensive than SKIP, SKIP, SKIP inside the
+   * current, loaded block).
+   * It does this by looking at the next indexed key of the current HFile. This key
+   * is then compared with the _SEEK_ key, where a SEEK key is an artificial 'last possible key
+   * on the row' (only in here, we avoid actually creating a SEEK key; in the compare we work with
+   * the current Cell but compare as though it were a seek key; see down in
+   * matcher.compareKeyForNextRow, etc). If the compare gets us onto the
+   * next block we *_SEEK, otherwise we just INCLUDE or SKIP, and let the ColumnTrackers or Filters
+   * go through the next Cell, and so on)
+   *
+   * <p>The ColumnTrackers and Filters must behave correctly in all cases, i.e. if they are past the
+   * Cells they care about they must issues a SKIP or SEEK.
+   *
+   * <p>Other notes:
+   * <ul>
+   * <li>Rows can straddle block boundaries</li>
+   * <li>Versions of columns can straddle block boundaries (i.e. column C1 at T1 might be in a
+   * different block than column C1 at T2)</li>
+   * <li>We want to SKIP and INCLUDE if the chance is high that we'll find the desired Cell after a
+   * few SKIPs...</li>
+   * <li>We want to INCLUDE_AND_SEEK and SEEK when the chance is high that we'll be able to seek
+   * past many Cells, especially if we know we need to go to the next block.</li>
+   * </ul>
+   * <p>A good proxy (best effort) to determine whether INCLUDE/SKIP is better than SEEK is whether
+   * we'll likely end up seeking to the next block (or past the next block) to get our next column.
+   * Example:
+   * <pre>
+   * |    BLOCK 1              |     BLOCK 2                   |
+   * |  r1/c1, r1/c2, r1/c3    |    r1/c4, r1/c5, r2/c1        |
+   *                                   ^         ^
+   *                                   |         |
+   *                           Next Index Key   SEEK_NEXT_ROW (before r2/c1)
+   *
+   *
+   * |    BLOCK 1                       |     BLOCK 2                      |
+   * |  r1/c1/t5, r1/c1/t4, r1/c1/t3    |    r1/c1/t2, r1/c1/T1, r1/c2/T3  |
+   *                                            ^              ^
+   *                                            |              |
+   *                                    Next Index Key        SEEK_NEXT_COL
+   * </pre>
+   * Now imagine we want columns c1 and c3 (see first diagram above), the 'Next Index Key' of r1/c4
+   * is > r1/c3 so we should seek to get to the c1 on the next row, r2. In second case, say we only
+   * want one version of c1, after we have it, a SEEK_COL will be issued to get to c2. Looking at
+   * the 'Next Index Key', it would land us in the next block, so we should SEEK. In other scenarios
+   * where the SEEK will not land us in the next block, it is very likely better to issues a series
+   * of SKIPs.
   */
-  private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
+  @VisibleForTesting
+  protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
    switch(qcode) {
    case INCLUDE_AND_SEEK_NEXT_COL:
    case SEEK_NEXT_COL:
@ -661,11 +716,17 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
    case INCLUDE_AND_SEEK_NEXT_ROW:
    case SEEK_NEXT_ROW:
    {
+      // If it is a Get Scan, then we know that we are done with this row; there are no more
+      // rows beyond the current one: don't try to optimize. We are DONE. Return the *_NEXT_ROW
+      // qcode as is. When the caller gets these flags on a Get Scan, it knows it can shut down the
+      // Scan.
+      if (!this.scan.isGetScan()) {
        Cell nextIndexedKey = getNextIndexedKey();
        if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY
-          && matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) {
+            && matcher.compareKeyForNextRow(nextIndexedKey, cell) > 0) {
          return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE;
        }
+      }
      break;
    }
    default:
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java
@ -30,8 +30,7 @@ import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.regionserver.NonReversedNonLazyKeyValueScanner;

 /**
- * Utility scanner that wraps a sortable collection and serves
- * as a KeyValueScanner.
+ * Utility scanner that wraps a sortable collection and serves as a KeyValueScanner.
 */
@InterfaceAudience.Private
 public class CollectionBackedScanner extends NonReversedNonLazyKeyValueScanner {
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
@ -33,8 +33,7 @@ import java.util.List;
 * to be a store file scanner.
 */
 public class KeyValueScanFixture extends CollectionBackedScanner {
-  public KeyValueScanFixture(KeyValue.KVComparator comparator,
-                             KeyValue... incData) {
+  public KeyValueScanFixture(KeyValue.KVComparator comparator, KeyValue... incData) {
    super(comparator, incData);
  }

--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java
@ -44,8 +44,7 @@ public class TestKeyValueScanFixture extends TestCase {
        KeyValueTestUtil.create("RowB", "family", "qf1",
            10, KeyValue.Type.Put, "value-10")
    };
-    KeyValueScanner scan = new KeyValueScanFixture(
-        KeyValue.COMPARATOR, kvs);
+    KeyValueScanner scan = new KeyValueScanFixture(KeyValue.COMPARATOR, kvs);

    KeyValue kv = KeyValueUtil.createFirstOnRow(Bytes.toBytes("RowA"));
    // should seek to this: