HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data

2016-02-09 20:55:20 -08:00 · 2016-02-09 20:55:20 -08:00 · 869c0cf43c
parent 01b73e9877
commit 869c0cf43c
5 changed files with 37 additions and 30 deletions
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@ -86,7 +86,7 @@ public class ChecksumUtil {
   * The header is extracted from the specified HFileBlock while the
   * data-to-be-verified is extracted from 'data'.
   */
-  static boolean validateBlockChecksum(String pathName, HFileBlock block,
+  static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block,
    byte[] data, int hdrSize) throws IOException {

    // If this is an older version of the block that does not have
@ -116,12 +116,13 @@ public class ChecksumUtil {
    assert dataChecksum != null;
    int sizeWithHeader =  block.getOnDiskDataSizeWithHeader();
    if (LOG.isTraceEnabled()) {
-      LOG.info("length of data = " + data.length
-          + " OnDiskDataSizeWithHeader = " + sizeWithHeader
-          + " checksum type = " + cktype.getName()
-          + " file =" + pathName
-          + " header size = " + hdrSize
-          + " bytesPerChecksum = " + bytesPerChecksum);
+      LOG.info("dataLength=" + data.length
+          + ", sizeWithHeader=" + sizeWithHeader
+          + ", checksumType=" + cktype.getName()
+          + ", file=" + pathName
+          + ", offset=" + offset
+          + ", headerSize=" + hdrSize
+          + ", bytesPerChecksum=" + bytesPerChecksum);
    }
    try {
      dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
@ -41,8 +41,7 @@ import org.apache.hadoop.hbase.util.Bytes;
 * trailer size is fixed within a given {@link HFile} format version only, but
 * we always store the version number as the last four-byte integer of the file.
 * The version number itself is split into two portions, a major 
- * version and a minor version. 
- * The last three bytes of a file is the major
+ * version and a minor version. The last three bytes of a file are the major
 * version and a single preceding byte is the minor number. The major version
 * determines which readers/writers to use to read/write a hfile while a minor
 * version determines smaller changes in hfile format that do not need a new
@ -50,7 +49,6 @@ import org.apache.hadoop.hbase.util.Bytes;
 */
@InterfaceAudience.Private
 public class FixedFileTrailer {
-
  /**
   * We store the comparator class name as a fixed-length field in the trailer.
   */
@ -65,8 +63,9 @@ public class FixedFileTrailer {
  /**
   * In version 1, the offset to the data block index. Starting from version 2,
   * the meaning of this field is the offset to the section of the file that
-   * should be loaded at the time the file is being opened, and as of the time
-   * of writing, this happens to be the offset of the file info section.
+   * should be loaded at the time the file is being opened: i.e. on open we load
+   * the root index, file info, etc. See http://hbase.apache.org/book.html#_hfile_format_2
+   * in the reference guide.
   */
  private long loadOnOpenDataOffset;

--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@ -1695,7 +1695,7 @@ public class HFileBlock implements Cacheable {
        b.assumeUncompressed();
      }

-      if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
+      if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, hdrSize)) {
        return null;             // checksum mismatch
      }

@ -1744,9 +1744,10 @@ public class HFileBlock implements Cacheable {
     * If there is a checksum mismatch, then return false. Otherwise
     * return true.
     */
-    protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
-        throws IOException {
-      return ChecksumUtil.validateBlockChecksum(pathName, block, data, hdrSize);
+    protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data,
+        int hdrSize)
+    throws IOException {
+      return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize);
    }

    @Override
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
@ -189,10 +189,14 @@ public class HFileReaderV2 extends AbstractHFileReader {
    if (cacheConf.shouldPrefetchOnOpen()) {
      PrefetchExecutor.request(path, new Runnable() {
        public void run() {
+          long offset = 0;
+          long end = 0;
          try {
-            long offset = 0;
-            long end = fileSize - getTrailer().getTrailerSize();
+            end = getTrailer().getLoadOnOpenDataOffset();
            HFileBlock prevBlock = null;
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end);
+            }
            while (offset < end) {
              if (Thread.interrupted()) {
                break;
@ -209,11 +213,11 @@ public class HFileReaderV2 extends AbstractHFileReader {
          } catch (IOException e) {
            // IOExceptions are probably due to region closes (relocation, etc.)
            if (LOG.isTraceEnabled()) {
-              LOG.trace("Exception encountered while prefetching " + path + ":", e);
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
            }
          } catch (Exception e) {
            // Other exceptions are interesting
-            LOG.warn("Exception encountered while prefetching " + path + ":", e);
+            LOG.warn("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
          } finally {
            PrefetchExecutor.complete(path);
          }
@ -380,9 +384,11 @@ public class HFileReaderV2 extends AbstractHFileReader {
    if (dataBlockIndexReader == null) {
      throw new IOException("Block index not loaded");
    }
-    if (dataBlockOffset < 0 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
+    long trailerOffset = trailer.getLoadOnOpenDataOffset();
+    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
      throw new IOException("Requested block is out of range: " + dataBlockOffset +
-        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset());
+        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
+        ", trailer.getLoadOnOpenDataOffset: " + trailerOffset);
    }

    // For any given block from any given file, synchronize reads for said block.
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@ -348,7 +348,7 @@ public class TestChecksum {
    }

    @Override
-    protected boolean validateBlockChecksum(HFileBlock block, 
+    protected boolean validateBlockChecksum(HFileBlock block, long offset,
      byte[] data, int hdrSize) throws IOException {
      return false;  // checksum validation failure
    }