HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data

M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java Cleanup trace message and include offset; makes debug the easier. M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java Fix incorrect data member javadoc. M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java Pass along the offset we are checksumming at. M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpljava Add trace logging for debugging and set the end of the prefetch to be last datablock, not size minus trailersize (there is the root indices and file info to be skipped)
2016-02-09 11:35:33 -08:00 · 2016-02-09 11:35:33 -08:00 · b6328eb803
commit b6328eb803
parent 7cab24729d
5 changed files with 37 additions and 30 deletions
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@ -38,11 +38,11 @@ public class ChecksumUtil {
  /** This is used to reserve space in a byte buffer */
  private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];

-  /** 
-   * This is used by unit tests to make checksum failures throw an 
-   * exception instead of returning null. Returning a null value from 
-   * checksum validation will cause the higher layer to retry that 
-   * read with hdfs-level checksums. Instead, we would like checksum 
+  /**
+   * This is used by unit tests to make checksum failures throw an
+   * exception instead of returning null. Returning a null value from
+   * checksum validation will cause the higher layer to retry that
+   * read with hdfs-level checksums. Instead, we would like checksum
   * failures to cause the entire unit test to fail.
   */
  private static boolean generateExceptions = false;
@ -86,7 +86,7 @@ public class ChecksumUtil {
   * The header is extracted from the specified HFileBlock while the
   * data-to-be-verified is extracted from 'data'.
   */
-  static boolean validateBlockChecksum(String pathName, HFileBlock block,
+  static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block,
    byte[] data, int hdrSize) throws IOException {

    // If this is an older version of the block that does not have
@ -100,7 +100,7 @@ public class ChecksumUtil {
    }

    // Get a checksum object based on the type of checksum that is
-    // set in the HFileBlock header. A ChecksumType.NULL indicates that 
+    // set in the HFileBlock header. A ChecksumType.NULL indicates that
    // the caller is not interested in validating checksums, so we
    // always return true.
    ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
@ -116,12 +116,13 @@ public class ChecksumUtil {
    assert dataChecksum != null;
    int sizeWithHeader =  block.getOnDiskDataSizeWithHeader();
    if (LOG.isTraceEnabled()) {
-      LOG.info("length of data = " + data.length
-          + " OnDiskDataSizeWithHeader = " + sizeWithHeader
-          + " checksum type = " + cktype.getName()
-          + " file =" + pathName
-          + " header size = " + hdrSize
-          + " bytesPerChecksum = " + bytesPerChecksum);
+      LOG.info("dataLength=" + data.length
+          + ", sizeWithHeader=" + sizeWithHeader
+          + ", checksumType=" + cktype.getName()
+          + ", file=" + pathName
+          + ", offset=" + offset
+          + ", headerSize=" + hdrSize
+          + ", bytesPerChecksum=" + bytesPerChecksum);
    }
    try {
      dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
@ -140,7 +141,7 @@ public class ChecksumUtil {
   * @return The number of bytes needed to store the checksum values
   */
  static long numBytes(long datasize, int bytesPerChecksum) {
-    return numChunks(datasize, bytesPerChecksum) * 
+    return numChunks(datasize, bytesPerChecksum) *
                     HFileBlock.CHECKSUM_SIZE;
  }

--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
@ -43,8 +43,7 @@ import org.apache.hadoop.hbase.util.Bytes;
 * trailer size is fixed within a given {@link HFile} format version only, but
 * we always store the version number as the last four-byte integer of the file.
 * The version number itself is split into two portions, a major 
- * version and a minor version. 
- * The last three bytes of a file is the major
+ * version and a minor version. The last three bytes of a file are the major
 * version and a single preceding byte is the minor number. The major version
 * determines which readers/writers to use to read/write a hfile while a minor
 * version determines smaller changes in hfile format that do not need a new
@ -52,7 +51,6 @@ import org.apache.hadoop.hbase.util.Bytes;
 */
@InterfaceAudience.Private
 public class FixedFileTrailer {
-
  /**
   * We store the comparator class name as a fixed-length field in the trailer.
   */
@ -67,8 +65,9 @@ public class FixedFileTrailer {
  /**
   * In version 1, the offset to the data block index. Starting from version 2,
   * the meaning of this field is the offset to the section of the file that
-   * should be loaded at the time the file is being opened, and as of the time
-   * of writing, this happens to be the offset of the file info section.
+   * should be loaded at the time the file is being opened: i.e. on open we load
+   * the root index, file info, etc. See http://hbase.apache.org/book.html#_hfile_format_2
+   * in the reference guide.
   */
  private long loadOnOpenDataOffset;

--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@ -1694,7 +1694,7 @@ public class HFileBlock implements Cacheable {
        b.assumeUncompressed();
      }

-      if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
+      if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, hdrSize)) {
        return null;             // checksum mismatch
      }

@ -1743,9 +1743,10 @@ public class HFileBlock implements Cacheable {
     * If there is a checksum mismatch, then return false. Otherwise
     * return true.
     */
-    protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
-        throws IOException {
-      return ChecksumUtil.validateBlockChecksum(pathName, block, data, hdrSize);
+    protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data,
+        int hdrSize)
+    throws IOException {
+      return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize);
    }

    @Override
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@ -248,10 +248,14 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
    if (cacheConf.shouldPrefetchOnOpen()) {
      PrefetchExecutor.request(path, new Runnable() {
        public void run() {
+          long offset = 0;
+          long end = 0;
          try {
-            long offset = 0;
-            long end = fileSize - getTrailer().getTrailerSize();
+            end = getTrailer().getLoadOnOpenDataOffset();
            HFileBlock prevBlock = null;
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end);
+            }
            while (offset < end) {
              if (Thread.interrupted()) {
                break;
@ -273,11 +277,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
          } catch (IOException e) {
            // IOExceptions are probably due to region closes (relocation, etc.)
            if (LOG.isTraceEnabled()) {
-              LOG.trace("Exception encountered while prefetching " + path + ":", e);
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
            }
          } catch (Exception e) {
            // Other exceptions are interesting
-            LOG.warn("Exception encountered while prefetching " + path + ":", e);
+            LOG.warn("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
          } finally {
            PrefetchExecutor.complete(path);
          }
@ -1457,9 +1461,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
    if (dataBlockIndexReader == null) {
      throw new IOException("Block index not loaded");
    }
-    if (dataBlockOffset < 0 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
+    long trailerOffset = trailer.getLoadOnOpenDataOffset();
+    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
      throw new IOException("Requested block is out of range: " + dataBlockOffset +
-        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset());
+        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
+        ", trailer.getLoadOnOpenDataOffset: " + trailerOffset);
    }
    // For any given block from any given file, synchronize reads for said
    // block.
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@ -349,7 +349,7 @@ public class TestChecksum {
    }

    @Override
-    protected boolean validateBlockChecksum(HFileBlock block, 
+    protected boolean validateBlockChecksum(HFileBlock block, long offset,
      byte[] data, int hdrSize) throws IOException {
      return false;  // checksum validation failure
    }