diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java index e6f4765f732..23a1d610360 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java @@ -70,12 +70,13 @@ public class EncodedDataBlock { /** * Provides access to compressed value. + * @param headerSize header size of the block. * @return Forwards sequential iterator. */ - public Iterator getIterator() { + public Iterator getIterator(int headerSize) { final int rawSize = rawKVs.length; byte[] encodedDataWithHeader = getEncodedData(); - int bytesToSkip = encodingCtx.getHeaderSize() + Bytes.SIZEOF_SHORT; + int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT; ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader, bytesToSkip, encodedDataWithHeader.length - bytesToSkip); final DataInputStream dis = new DataInputStream(bais); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java index c3bffe66421..086e6e43420 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java @@ -60,7 +60,7 @@ public class HFileBlockDefaultEncodingContext implements private ByteArrayOutputStream encodedStream = new ByteArrayOutputStream(); private DataOutputStream dataOut = new DataOutputStream(encodedStream); - private final byte[] dummyHeader; + private byte[] dummyHeader; /** * @param compressionAlgorithm compression algorithm used @@ -86,8 +86,13 @@ public class HFileBlockDefaultEncodingContext implements + compressionAlgorithm, e); } } - dummyHeader = Preconditions.checkNotNull(headerBytes, - "Please pass HFileBlock.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes"); + dummyHeader = Preconditions.checkNotNull(headerBytes, + "Please pass HConstants.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes"); + } + + @Override + public void setDummyHeader(byte[] headerBytes) { + dummyHeader = headerBytes; } /** @@ -107,7 +112,7 @@ public class HFileBlockDefaultEncodingContext implements public void postEncoding(BlockType blockType) throws IOException { dataOut.flush(); - compressAfterEncoding(encodedStream.toByteArray(), blockType); + compressAfterEncodingWithBlockType(encodedStream.toByteArray(), blockType); this.blockType = blockType; } @@ -116,7 +121,7 @@ public class HFileBlockDefaultEncodingContext implements * @param blockType * @throws IOException */ - public void compressAfterEncoding(byte[] uncompressedBytesWithHeader, + public void compressAfterEncodingWithBlockType(byte[] uncompressedBytesWithHeader, BlockType blockType) throws IOException { compressAfterEncoding(uncompressedBytesWithHeader, blockType, dummyHeader); } @@ -187,10 +192,4 @@ public class HFileBlockDefaultEncodingContext implements public DataBlockEncoding getDataBlockEncoding() { return this.encodingAlgo; } - - @Override - public int getHeaderSize() { - return this.dummyHeader.length; - } - } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java index 0072f5d9110..78e2c740624 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java @@ -59,9 +59,9 @@ public interface HFileBlockEncodingContext { public Compression.Algorithm getCompression(); /** - * @return the header size used + * sets the dummy header bytes */ - public int getHeaderSize(); + public void setDummyHeader(byte[] headerBytes); /** * @return the {@link DataBlockEncoding} encoding used diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java index d8f70906559..96ed4001d67 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java @@ -580,7 +580,7 @@ public class FixedFileTrailer { /** * Returns the minor version of this HFile format */ - int getMinorVersion() { + public int getMinorVersion() { return minorVersion; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 70f479415bb..e3fbb337632 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -813,7 +813,7 @@ public class HFileBlock implements Cacheable { if (blockType == BlockType.DATA) { encodeDataBlockForDisk(); } else { - defaultBlockEncodingCtx.compressAfterEncoding( + defaultBlockEncodingCtx.compressAfterEncodingWithBlockType( uncompressedBytesWithHeader, blockType); onDiskBytesWithHeader = defaultBlockEncodingCtx.getOnDiskBytesWithHeader(); @@ -1748,13 +1748,30 @@ public class HFileBlock implements Cacheable { /** * Maps a minor version to the size of the header. */ - static private int headerSize(int minorVersion) { + public static int headerSize(int minorVersion) { if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) { return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; } return HConstants.HFILEBLOCK_HEADER_SIZE; } + /** + * Return the appropriate DUMMY_HEADER for the minor version + */ + public byte[] getDummyHeaderForVersion() { + return getDummyHeaderForVersion(minorVersion); + } + + /** + * Return the appropriate DUMMY_HEADER for the minor version + */ + static private byte[] getDummyHeaderForVersion(int minorVersion) { + if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) { + return DUMMY_HEADER_NO_CHECKSUM; + } + return HConstants.HFILEBLOCK_DUMMY_HEADER; + } + /** * Convert the contents of the block header into a human readable string. * This is mostly helpful for debugging. This assumes that the block diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java index 03679048c31..e13090202fe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java @@ -57,7 +57,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { */ public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk, DataBlockEncoding inCache) { - this(onDisk, inCache, null); + this(onDisk, inCache, HConstants.HFILEBLOCK_DUMMY_HEADER); } /** @@ -71,7 +71,6 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { */ public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk, DataBlockEncoding inCache, byte[] dummyHeader) { - dummyHeader = dummyHeader == null ? HConstants.HFILEBLOCK_DUMMY_HEADER : dummyHeader; this.onDisk = onDisk != null ? onDisk : DataBlockEncoding.NONE; this.inCache = inCache != null ? @@ -96,18 +95,25 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { public static HFileDataBlockEncoder createFromFileInfo( FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache) throws IOException { + boolean hasPreferredCacheEncoding = preferredEncodingInCache != null + && preferredEncodingInCache != DataBlockEncoding.NONE; + byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING); - if (dataBlockEncodingType == null) { + if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) { return NoOpDataBlockEncoder.INSTANCE; } - String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType); DataBlockEncoding onDisk; - try { - onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr); - } catch (IllegalArgumentException ex) { - throw new IOException("Invalid data block encoding type in file info: " + - dataBlockEncodingStr, ex); + if (dataBlockEncodingType == null) { + onDisk = DataBlockEncoding.NONE; + } else { + String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType); + try { + onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr); + } catch (IllegalArgumentException ex) { + throw new IOException("Invalid data block encoding type in file info: " + + dataBlockEncodingStr, ex); + } } DataBlockEncoding inCache; @@ -123,6 +129,8 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { // but new files will be generated with the new encoding. inCache = onDisk; } + // TODO: we are not passing proper header size here based on minor version, presumably + // because this encoder will never actually be used for encoding. return new HFileDataBlockEncoderImpl(onDisk, inCache); } @@ -189,7 +197,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { BlockType blockType) throws IOException { if (onDisk == DataBlockEncoding.NONE) { // there is no need to encode the block before writing it to disk - ((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncoding( + ((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncodingWithBlockType( in.array(), blockType); return; } @@ -231,12 +239,13 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { private HFileBlock encodeDataBlock(HFileBlock block, DataBlockEncoding algo, boolean includesMemstoreTS, HFileBlockEncodingContext encodingCtx) { + encodingCtx.setDummyHeader(block.getDummyHeaderForVersion()); encodeBufferToHFileBlockBuffer( block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx); byte[] encodedUncompressedBytes = encodingCtx.getUncompressedBytesWithHeader(); ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes); - int sizeWithoutHeader = bufferWrapper.limit() - encodingCtx.getHeaderSize(); + int sizeWithoutHeader = bufferWrapper.limit() - block.headerSize(); HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA, block.getOnDiskSizeWithoutHeader(), sizeWithoutHeader, block.getPrevBlockOffset(), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java index 27da395e950..22a9b0ff47a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java @@ -61,7 +61,7 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder { HFileBlockDefaultEncodingContext defaultContext = (HFileBlockDefaultEncodingContext) encodeCtx; - defaultContext.compressAfterEncoding(in.array(), blockType); + defaultContext.compressAfterEncodingWithBlockType(in.array(), blockType); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index b4ccdc9a680..7fbdeb53ea9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -1532,6 +1532,10 @@ public class StoreFile { return reader.getTrailer().getMajorVersion(); } + public int getHFileMinorVersion() { + return reader.getTrailer().getMinorVersion(); + } + public HFile.Reader getHFileReader() { return reader; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java index 9396b530370..7138d3d9981 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java @@ -419,7 +419,7 @@ public class TestHFileBlock { new byte[rawBuf.array().length + headerLen]; System.arraycopy(rawBuf.array(), 0, rawBufWithHeader, headerLen, rawBuf.array().length); - defaultEncodingCtx.compressAfterEncoding(rawBufWithHeader, + defaultEncodingCtx.compressAfterEncodingWithBlockType(rawBufWithHeader, BlockType.DATA); encodedResultWithHeader = defaultEncodingCtx.getUncompressedBytesWithHeader(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java index 7b3e5beea8f..b4f0d5fdd91 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java @@ -494,7 +494,7 @@ public class TestHFileBlockCompatibility { if (blockType == BlockType.DATA) { encodeDataBlockForDisk(); } else { - defaultBlockEncodingCtx.compressAfterEncoding( + defaultBlockEncodingCtx.compressAfterEncodingWithBlockType( uncompressedBytesWithHeader, blockType); onDiskBytesWithHeader = defaultBlockEncodingCtx.getOnDiskBytesWithHeader(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java index f27efd7721c..6a5a742c5b2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java @@ -104,6 +104,41 @@ public class TestHFileDataBlockEncoder { } } + /** Test for HBASE-5746. */ + @Test + public void testHeaderSizeInCacheWithoutChecksum() throws Exception { + int headerSize = HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; + // Create some KVs and create the block with old-style header. + ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer( + generator.generateTestKeyValues(60), includesMemstoreTS); + int size = keyValues.limit(); + ByteBuffer buf = ByteBuffer.allocate(size + headerSize); + buf.position(headerSize); + keyValues.rewind(); + buf.put(keyValues); + HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, + HFileBlock.FILL_HEADER, 0, includesMemstoreTS, + HFileBlock.MINOR_VERSION_NO_CHECKSUM, 0, ChecksumType.NULL.getCode(), 0); + HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(createBlockOnDisk(block), false); + assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length); + } + + private HFileBlock createBlockOnDisk(HFileBlock block) throws IOException { + int size; + HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext( + Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), + HConstants.HFILEBLOCK_DUMMY_HEADER); + context.setDummyHeader(block.getDummyHeaderForVersion()); + blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(), + includesMemstoreTS, context, block.getBlockType()); + byte[] encodedBytes = context.getUncompressedBytesWithHeader(); + size = encodedBytes.length - block.getDummyHeaderForVersion().length; + return new HFileBlock(context.getBlockType(), size, size, -1, + ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0, includesMemstoreTS, + block.getMinorVersion(), block.getBytesPerChecksum(), block.getChecksumType(), + block.getOnDiskDataSizeWithHeader()); + } + /** * Test writing to disk. * @throws IOException @@ -112,19 +147,7 @@ public class TestHFileDataBlockEncoder { public void testEncodingWritePath() throws IOException { // usually we have just block without headers, but don't complicate that HFileBlock block = getSampleHFileBlock(); - HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext( - Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), HConstants.HFILEBLOCK_DUMMY_HEADER); - blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(), - includesMemstoreTS, context, block.getBlockType()); - - byte[] encodedBytes = context.getUncompressedBytesWithHeader(); - int size = encodedBytes.length - HConstants.HFILEBLOCK_HEADER_SIZE; - HFileBlock blockOnDisk = - new HFileBlock(context.getBlockType(), size, size, -1, - ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0, - includesMemstoreTS, block.getMinorVersion(), - block.getBytesPerChecksum(), block.getChecksumType(), - block.getOnDiskDataSizeWithHeader()); + HFileBlock blockOnDisk = createBlockOnDisk(block); if (blockEncoder.getEncodingOnDisk() != DataBlockEncoding.NONE) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java index 0aeaf36b36c..d098fb18654 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.compress.CompressionOutputStream; @@ -119,6 +120,7 @@ public class DataBlockEncodingTool { private long totalCFLength = 0; private byte[] rawKVs; + private int minorVersion = 0; private final String compressionAlgorithmName; private final Algorithm compressionAlgorithm; @@ -228,7 +230,7 @@ public class DataBlockEncodingTool { List> codecIterators = new ArrayList>(); for(EncodedDataBlock codec : codecs) { - codecIterators.add(codec.getIterator()); + codecIterators.add(codec.getIterator(HFileBlock.headerSize(minorVersion))); } int j = 0; @@ -320,7 +322,7 @@ public class DataBlockEncodingTool { Iterator it; - it = codec.getIterator(); + it = codec.getIterator(HFileBlock.headerSize(minorVersion)); // count only the algorithm time, without memory allocations // (expect first time) @@ -590,6 +592,7 @@ public class DataBlockEncodingTool { // run the utilities DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName); + comp.minorVersion = reader.getHFileMinorVersion(); comp.checkStatistics(scanner, kvLimit); if (doVerify) { comp.verifyCodecs(scanner, kvLimit);