From 869c0cf43c0dd76e80e7b6cbbb855da773a7a360 Mon Sep 17 00:00:00 2001 From: stack Date: Tue, 9 Feb 2016 20:55:20 -0800 Subject: [PATCH] HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data --- .../hadoop/hbase/io/hfile/ChecksumUtil.java | 29 ++++++++++--------- .../hbase/io/hfile/FixedFileTrailer.java | 9 +++--- .../hadoop/hbase/io/hfile/HFileBlock.java | 9 +++--- .../hadoop/hbase/io/hfile/HFileReaderV2.java | 18 ++++++++---- .../hadoop/hbase/io/hfile/TestChecksum.java | 2 +- 5 files changed, 37 insertions(+), 30 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java index 402caa8942e..69f4330f61a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java @@ -38,11 +38,11 @@ public class ChecksumUtil { /** This is used to reserve space in a byte buffer */ private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE]; - /** - * This is used by unit tests to make checksum failures throw an - * exception instead of returning null. Returning a null value from - * checksum validation will cause the higher layer to retry that - * read with hdfs-level checksums. Instead, we would like checksum + /** + * This is used by unit tests to make checksum failures throw an + * exception instead of returning null. Returning a null value from + * checksum validation will cause the higher layer to retry that + * read with hdfs-level checksums. Instead, we would like checksum * failures to cause the entire unit test to fail. */ private static boolean generateExceptions = false; @@ -86,7 +86,7 @@ public class ChecksumUtil { * The header is extracted from the specified HFileBlock while the * data-to-be-verified is extracted from 'data'. */ - static boolean validateBlockChecksum(String pathName, HFileBlock block, + static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block, byte[] data, int hdrSize) throws IOException { // If this is an older version of the block that does not have @@ -100,7 +100,7 @@ public class ChecksumUtil { } // Get a checksum object based on the type of checksum that is - // set in the HFileBlock header. A ChecksumType.NULL indicates that + // set in the HFileBlock header. A ChecksumType.NULL indicates that // the caller is not interested in validating checksums, so we // always return true. ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); @@ -116,12 +116,13 @@ public class ChecksumUtil { assert dataChecksum != null; int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); if (LOG.isTraceEnabled()) { - LOG.info("length of data = " + data.length - + " OnDiskDataSizeWithHeader = " + sizeWithHeader - + " checksum type = " + cktype.getName() - + " file =" + pathName - + " header size = " + hdrSize - + " bytesPerChecksum = " + bytesPerChecksum); + LOG.info("dataLength=" + data.length + + ", sizeWithHeader=" + sizeWithHeader + + ", checksumType=" + cktype.getName() + + ", file=" + pathName + + ", offset=" + offset + + ", headerSize=" + hdrSize + + ", bytesPerChecksum=" + bytesPerChecksum); } try { dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), @@ -140,7 +141,7 @@ public class ChecksumUtil { * @return The number of bytes needed to store the checksum values */ static long numBytes(long datasize, int bytesPerChecksum) { - return numChunks(datasize, bytesPerChecksum) * + return numChunks(datasize, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java index 56510f06c5a..67350366c50 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java @@ -41,8 +41,7 @@ import org.apache.hadoop.hbase.util.Bytes; * trailer size is fixed within a given {@link HFile} format version only, but * we always store the version number as the last four-byte integer of the file. * The version number itself is split into two portions, a major - * version and a minor version. - * The last three bytes of a file is the major + * version and a minor version. The last three bytes of a file are the major * version and a single preceding byte is the minor number. The major version * determines which readers/writers to use to read/write a hfile while a minor * version determines smaller changes in hfile format that do not need a new @@ -50,7 +49,6 @@ import org.apache.hadoop.hbase.util.Bytes; */ @InterfaceAudience.Private public class FixedFileTrailer { - /** * We store the comparator class name as a fixed-length field in the trailer. */ @@ -65,8 +63,9 @@ public class FixedFileTrailer { /** * In version 1, the offset to the data block index. Starting from version 2, * the meaning of this field is the offset to the section of the file that - * should be loaded at the time the file is being opened, and as of the time - * of writing, this happens to be the offset of the file info section. + * should be loaded at the time the file is being opened: i.e. on open we load + * the root index, file info, etc. See http://hbase.apache.org/book.html#_hfile_format_2 + * in the reference guide. */ private long loadOnOpenDataOffset; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 0f63e0f2c43..edccfb59dbe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -1695,7 +1695,7 @@ public class HFileBlock implements Cacheable { b.assumeUncompressed(); } - if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) { + if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, hdrSize)) { return null; // checksum mismatch } @@ -1744,9 +1744,10 @@ public class HFileBlock implements Cacheable { * If there is a checksum mismatch, then return false. Otherwise * return true. */ - protected boolean validateBlockChecksum(HFileBlock block, byte[] data, int hdrSize) - throws IOException { - return ChecksumUtil.validateBlockChecksum(pathName, block, data, hdrSize); + protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data, + int hdrSize) + throws IOException { + return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java index 12c46e8dc95..a1b4c340d69 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java @@ -189,10 +189,14 @@ public class HFileReaderV2 extends AbstractHFileReader { if (cacheConf.shouldPrefetchOnOpen()) { PrefetchExecutor.request(path, new Runnable() { public void run() { + long offset = 0; + long end = 0; try { - long offset = 0; - long end = fileSize - getTrailer().getTrailerSize(); + end = getTrailer().getLoadOnOpenDataOffset(); HFileBlock prevBlock = null; + if (LOG.isTraceEnabled()) { + LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end); + } while (offset < end) { if (Thread.interrupted()) { break; @@ -209,11 +213,11 @@ public class HFileReaderV2 extends AbstractHFileReader { } catch (IOException e) { // IOExceptions are probably due to region closes (relocation, etc.) if (LOG.isTraceEnabled()) { - LOG.trace("Exception encountered while prefetching " + path + ":", e); + LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e); } } catch (Exception e) { // Other exceptions are interesting - LOG.warn("Exception encountered while prefetching " + path + ":", e); + LOG.warn("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e); } finally { PrefetchExecutor.complete(path); } @@ -380,9 +384,11 @@ public class HFileReaderV2 extends AbstractHFileReader { if (dataBlockIndexReader == null) { throw new IOException("Block index not loaded"); } - if (dataBlockOffset < 0 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) { + long trailerOffset = trailer.getLoadOnOpenDataOffset(); + if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) { throw new IOException("Requested block is out of range: " + dataBlockOffset + - ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset()); + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() + + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset); } // For any given block from any given file, synchronize reads for said block. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java index 9f19251405b..4f29ff555b0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java @@ -348,7 +348,7 @@ public class TestChecksum { } @Override - protected boolean validateBlockChecksum(HFileBlock block, + protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data, int hdrSize) throws IOException { return false; // checksum validation failure }