HBASE-5746 HFileDataBlockEncoderImpl uses wrong header size when reading HFiles with no checksums (0.96)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1478966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
515d19c52e
commit
d30d4ef8e7
|
@ -70,12 +70,13 @@ public class EncodedDataBlock {
|
|||
|
||||
/**
|
||||
* Provides access to compressed value.
|
||||
* @param headerSize header size of the block.
|
||||
* @return Forwards sequential iterator.
|
||||
*/
|
||||
public Iterator<KeyValue> getIterator() {
|
||||
public Iterator<KeyValue> getIterator(int headerSize) {
|
||||
final int rawSize = rawKVs.length;
|
||||
byte[] encodedDataWithHeader = getEncodedData();
|
||||
int bytesToSkip = encodingCtx.getHeaderSize() + Bytes.SIZEOF_SHORT;
|
||||
int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
|
||||
bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
|
||||
final DataInputStream dis = new DataInputStream(bais);
|
||||
|
|
|
@ -60,7 +60,7 @@ public class HFileBlockDefaultEncodingContext implements
|
|||
private ByteArrayOutputStream encodedStream = new ByteArrayOutputStream();
|
||||
private DataOutputStream dataOut = new DataOutputStream(encodedStream);
|
||||
|
||||
private final byte[] dummyHeader;
|
||||
private byte[] dummyHeader;
|
||||
|
||||
/**
|
||||
* @param compressionAlgorithm compression algorithm used
|
||||
|
@ -87,7 +87,12 @@ public class HFileBlockDefaultEncodingContext implements
|
|||
}
|
||||
}
|
||||
dummyHeader = Preconditions.checkNotNull(headerBytes,
|
||||
"Please pass HFileBlock.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes");
|
||||
"Please pass HConstants.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDummyHeader(byte[] headerBytes) {
|
||||
dummyHeader = headerBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -107,7 +112,7 @@ public class HFileBlockDefaultEncodingContext implements
|
|||
public void postEncoding(BlockType blockType)
|
||||
throws IOException {
|
||||
dataOut.flush();
|
||||
compressAfterEncoding(encodedStream.toByteArray(), blockType);
|
||||
compressAfterEncodingWithBlockType(encodedStream.toByteArray(), blockType);
|
||||
this.blockType = blockType;
|
||||
}
|
||||
|
||||
|
@ -116,7 +121,7 @@ public class HFileBlockDefaultEncodingContext implements
|
|||
* @param blockType
|
||||
* @throws IOException
|
||||
*/
|
||||
public void compressAfterEncoding(byte[] uncompressedBytesWithHeader,
|
||||
public void compressAfterEncodingWithBlockType(byte[] uncompressedBytesWithHeader,
|
||||
BlockType blockType) throws IOException {
|
||||
compressAfterEncoding(uncompressedBytesWithHeader, blockType, dummyHeader);
|
||||
}
|
||||
|
@ -187,10 +192,4 @@ public class HFileBlockDefaultEncodingContext implements
|
|||
public DataBlockEncoding getDataBlockEncoding() {
|
||||
return this.encodingAlgo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getHeaderSize() {
|
||||
return this.dummyHeader.length;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -59,9 +59,9 @@ public interface HFileBlockEncodingContext {
|
|||
public Compression.Algorithm getCompression();
|
||||
|
||||
/**
|
||||
* @return the header size used
|
||||
* sets the dummy header bytes
|
||||
*/
|
||||
public int getHeaderSize();
|
||||
public void setDummyHeader(byte[] headerBytes);
|
||||
|
||||
/**
|
||||
* @return the {@link DataBlockEncoding} encoding used
|
||||
|
|
|
@ -580,7 +580,7 @@ public class FixedFileTrailer {
|
|||
/**
|
||||
* Returns the minor version of this HFile format
|
||||
*/
|
||||
int getMinorVersion() {
|
||||
public int getMinorVersion() {
|
||||
return minorVersion;
|
||||
}
|
||||
|
||||
|
|
|
@ -813,7 +813,7 @@ public class HFileBlock implements Cacheable {
|
|||
if (blockType == BlockType.DATA) {
|
||||
encodeDataBlockForDisk();
|
||||
} else {
|
||||
defaultBlockEncodingCtx.compressAfterEncoding(
|
||||
defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
|
||||
uncompressedBytesWithHeader, blockType);
|
||||
onDiskBytesWithHeader =
|
||||
defaultBlockEncodingCtx.getOnDiskBytesWithHeader();
|
||||
|
@ -1748,13 +1748,30 @@ public class HFileBlock implements Cacheable {
|
|||
/**
|
||||
* Maps a minor version to the size of the header.
|
||||
*/
|
||||
static private int headerSize(int minorVersion) {
|
||||
public static int headerSize(int minorVersion) {
|
||||
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
|
||||
return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
|
||||
}
|
||||
return HConstants.HFILEBLOCK_HEADER_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the appropriate DUMMY_HEADER for the minor version
|
||||
*/
|
||||
public byte[] getDummyHeaderForVersion() {
|
||||
return getDummyHeaderForVersion(minorVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the appropriate DUMMY_HEADER for the minor version
|
||||
*/
|
||||
static private byte[] getDummyHeaderForVersion(int minorVersion) {
|
||||
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
|
||||
return DUMMY_HEADER_NO_CHECKSUM;
|
||||
}
|
||||
return HConstants.HFILEBLOCK_DUMMY_HEADER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the contents of the block header into a human readable string.
|
||||
* This is mostly helpful for debugging. This assumes that the block
|
||||
|
|
|
@ -57,7 +57,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
|
|||
*/
|
||||
public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
|
||||
DataBlockEncoding inCache) {
|
||||
this(onDisk, inCache, null);
|
||||
this(onDisk, inCache, HConstants.HFILEBLOCK_DUMMY_HEADER);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -71,7 +71,6 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
|
|||
*/
|
||||
public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
|
||||
DataBlockEncoding inCache, byte[] dummyHeader) {
|
||||
dummyHeader = dummyHeader == null ? HConstants.HFILEBLOCK_DUMMY_HEADER : dummyHeader;
|
||||
this.onDisk = onDisk != null ?
|
||||
onDisk : DataBlockEncoding.NONE;
|
||||
this.inCache = inCache != null ?
|
||||
|
@ -96,18 +95,25 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
|
|||
public static HFileDataBlockEncoder createFromFileInfo(
|
||||
FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
|
||||
throws IOException {
|
||||
boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
|
||||
&& preferredEncodingInCache != DataBlockEncoding.NONE;
|
||||
|
||||
byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
|
||||
if (dataBlockEncodingType == null) {
|
||||
if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
|
||||
return NoOpDataBlockEncoder.INSTANCE;
|
||||
}
|
||||
|
||||
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
|
||||
DataBlockEncoding onDisk;
|
||||
if (dataBlockEncodingType == null) {
|
||||
onDisk = DataBlockEncoding.NONE;
|
||||
} else {
|
||||
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
|
||||
try {
|
||||
onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
|
||||
} catch (IllegalArgumentException ex) {
|
||||
throw new IOException("Invalid data block encoding type in file info: " +
|
||||
dataBlockEncodingStr, ex);
|
||||
throw new IOException("Invalid data block encoding type in file info: "
|
||||
+ dataBlockEncodingStr, ex);
|
||||
}
|
||||
}
|
||||
|
||||
DataBlockEncoding inCache;
|
||||
|
@ -123,6 +129,8 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
|
|||
// but new files will be generated with the new encoding.
|
||||
inCache = onDisk;
|
||||
}
|
||||
// TODO: we are not passing proper header size here based on minor version, presumably
|
||||
// because this encoder will never actually be used for encoding.
|
||||
return new HFileDataBlockEncoderImpl(onDisk, inCache);
|
||||
}
|
||||
|
||||
|
@ -189,7 +197,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
|
|||
BlockType blockType) throws IOException {
|
||||
if (onDisk == DataBlockEncoding.NONE) {
|
||||
// there is no need to encode the block before writing it to disk
|
||||
((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncoding(
|
||||
((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncodingWithBlockType(
|
||||
in.array(), blockType);
|
||||
return;
|
||||
}
|
||||
|
@ -231,12 +239,13 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
|
|||
private HFileBlock encodeDataBlock(HFileBlock block,
|
||||
DataBlockEncoding algo, boolean includesMemstoreTS,
|
||||
HFileBlockEncodingContext encodingCtx) {
|
||||
encodingCtx.setDummyHeader(block.getDummyHeaderForVersion());
|
||||
encodeBufferToHFileBlockBuffer(
|
||||
block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx);
|
||||
byte[] encodedUncompressedBytes =
|
||||
encodingCtx.getUncompressedBytesWithHeader();
|
||||
ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes);
|
||||
int sizeWithoutHeader = bufferWrapper.limit() - encodingCtx.getHeaderSize();
|
||||
int sizeWithoutHeader = bufferWrapper.limit() - block.headerSize();
|
||||
HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
|
||||
block.getOnDiskSizeWithoutHeader(),
|
||||
sizeWithoutHeader, block.getPrevBlockOffset(),
|
||||
|
|
|
@ -61,7 +61,7 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder {
|
|||
|
||||
HFileBlockDefaultEncodingContext defaultContext =
|
||||
(HFileBlockDefaultEncodingContext) encodeCtx;
|
||||
defaultContext.compressAfterEncoding(in.array(), blockType);
|
||||
defaultContext.compressAfterEncodingWithBlockType(in.array(), blockType);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1532,6 +1532,10 @@ public class StoreFile {
|
|||
return reader.getTrailer().getMajorVersion();
|
||||
}
|
||||
|
||||
public int getHFileMinorVersion() {
|
||||
return reader.getTrailer().getMinorVersion();
|
||||
}
|
||||
|
||||
public HFile.Reader getHFileReader() {
|
||||
return reader;
|
||||
}
|
||||
|
|
|
@ -419,7 +419,7 @@ public class TestHFileBlock {
|
|||
new byte[rawBuf.array().length + headerLen];
|
||||
System.arraycopy(rawBuf.array(), 0, rawBufWithHeader,
|
||||
headerLen, rawBuf.array().length);
|
||||
defaultEncodingCtx.compressAfterEncoding(rawBufWithHeader,
|
||||
defaultEncodingCtx.compressAfterEncodingWithBlockType(rawBufWithHeader,
|
||||
BlockType.DATA);
|
||||
encodedResultWithHeader =
|
||||
defaultEncodingCtx.getUncompressedBytesWithHeader();
|
||||
|
|
|
@ -494,7 +494,7 @@ public class TestHFileBlockCompatibility {
|
|||
if (blockType == BlockType.DATA) {
|
||||
encodeDataBlockForDisk();
|
||||
} else {
|
||||
defaultBlockEncodingCtx.compressAfterEncoding(
|
||||
defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
|
||||
uncompressedBytesWithHeader, blockType);
|
||||
onDiskBytesWithHeader =
|
||||
defaultBlockEncodingCtx.getOnDiskBytesWithHeader();
|
||||
|
|
|
@ -104,6 +104,41 @@ public class TestHFileDataBlockEncoder {
|
|||
}
|
||||
}
|
||||
|
||||
/** Test for HBASE-5746. */
|
||||
@Test
|
||||
public void testHeaderSizeInCacheWithoutChecksum() throws Exception {
|
||||
int headerSize = HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
|
||||
// Create some KVs and create the block with old-style header.
|
||||
ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer(
|
||||
generator.generateTestKeyValues(60), includesMemstoreTS);
|
||||
int size = keyValues.limit();
|
||||
ByteBuffer buf = ByteBuffer.allocate(size + headerSize);
|
||||
buf.position(headerSize);
|
||||
keyValues.rewind();
|
||||
buf.put(keyValues);
|
||||
HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
|
||||
HFileBlock.FILL_HEADER, 0, includesMemstoreTS,
|
||||
HFileBlock.MINOR_VERSION_NO_CHECKSUM, 0, ChecksumType.NULL.getCode(), 0);
|
||||
HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(createBlockOnDisk(block), false);
|
||||
assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length);
|
||||
}
|
||||
|
||||
private HFileBlock createBlockOnDisk(HFileBlock block) throws IOException {
|
||||
int size;
|
||||
HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext(
|
||||
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(),
|
||||
HConstants.HFILEBLOCK_DUMMY_HEADER);
|
||||
context.setDummyHeader(block.getDummyHeaderForVersion());
|
||||
blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
|
||||
includesMemstoreTS, context, block.getBlockType());
|
||||
byte[] encodedBytes = context.getUncompressedBytesWithHeader();
|
||||
size = encodedBytes.length - block.getDummyHeaderForVersion().length;
|
||||
return new HFileBlock(context.getBlockType(), size, size, -1,
|
||||
ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0, includesMemstoreTS,
|
||||
block.getMinorVersion(), block.getBytesPerChecksum(), block.getChecksumType(),
|
||||
block.getOnDiskDataSizeWithHeader());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test writing to disk.
|
||||
* @throws IOException
|
||||
|
@ -112,19 +147,7 @@ public class TestHFileDataBlockEncoder {
|
|||
public void testEncodingWritePath() throws IOException {
|
||||
// usually we have just block without headers, but don't complicate that
|
||||
HFileBlock block = getSampleHFileBlock();
|
||||
HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext(
|
||||
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), HConstants.HFILEBLOCK_DUMMY_HEADER);
|
||||
blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
|
||||
includesMemstoreTS, context, block.getBlockType());
|
||||
|
||||
byte[] encodedBytes = context.getUncompressedBytesWithHeader();
|
||||
int size = encodedBytes.length - HConstants.HFILEBLOCK_HEADER_SIZE;
|
||||
HFileBlock blockOnDisk =
|
||||
new HFileBlock(context.getBlockType(), size, size, -1,
|
||||
ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0,
|
||||
includesMemstoreTS, block.getMinorVersion(),
|
||||
block.getBytesPerChecksum(), block.getChecksumType(),
|
||||
block.getOnDiskDataSizeWithHeader());
|
||||
HFileBlock blockOnDisk = createBlockOnDisk(block);
|
||||
|
||||
if (blockEncoder.getEncodingOnDisk() !=
|
||||
DataBlockEncoding.NONE) {
|
||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
|
|||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
|
||||
import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;
|
||||
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
|
||||
import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.compress.CompressionOutputStream;
|
||||
|
@ -119,6 +120,7 @@ public class DataBlockEncodingTool {
|
|||
private long totalCFLength = 0;
|
||||
|
||||
private byte[] rawKVs;
|
||||
private int minorVersion = 0;
|
||||
|
||||
private final String compressionAlgorithmName;
|
||||
private final Algorithm compressionAlgorithm;
|
||||
|
@ -228,7 +230,7 @@ public class DataBlockEncodingTool {
|
|||
List<Iterator<KeyValue>> codecIterators =
|
||||
new ArrayList<Iterator<KeyValue>>();
|
||||
for(EncodedDataBlock codec : codecs) {
|
||||
codecIterators.add(codec.getIterator());
|
||||
codecIterators.add(codec.getIterator(HFileBlock.headerSize(minorVersion)));
|
||||
}
|
||||
|
||||
int j = 0;
|
||||
|
@ -320,7 +322,7 @@ public class DataBlockEncodingTool {
|
|||
|
||||
Iterator<KeyValue> it;
|
||||
|
||||
it = codec.getIterator();
|
||||
it = codec.getIterator(HFileBlock.headerSize(minorVersion));
|
||||
|
||||
// count only the algorithm time, without memory allocations
|
||||
// (expect first time)
|
||||
|
@ -590,6 +592,7 @@ public class DataBlockEncodingTool {
|
|||
|
||||
// run the utilities
|
||||
DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
|
||||
comp.minorVersion = reader.getHFileMinorVersion();
|
||||
comp.checkStatistics(scanner, kvLimit);
|
||||
if (doVerify) {
|
||||
comp.verifyCodecs(scanner, kvLimit);
|
||||
|
|
Loading…
Reference in New Issue