HBASE-5746 HFileDataBlockEncoderImpl uses wrong header size when reading HFiles with no checksums (0.96)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1478966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
sershe 2013-05-03 20:31:46 +00:00
parent 515d19c52e
commit d30d4ef8e7
12 changed files with 103 additions and 47 deletions

View File

@ -70,12 +70,13 @@ public class EncodedDataBlock {
/** /**
* Provides access to compressed value. * Provides access to compressed value.
* @param headerSize header size of the block.
* @return Forwards sequential iterator. * @return Forwards sequential iterator.
*/ */
public Iterator<KeyValue> getIterator() { public Iterator<KeyValue> getIterator(int headerSize) {
final int rawSize = rawKVs.length; final int rawSize = rawKVs.length;
byte[] encodedDataWithHeader = getEncodedData(); byte[] encodedDataWithHeader = getEncodedData();
int bytesToSkip = encodingCtx.getHeaderSize() + Bytes.SIZEOF_SHORT; int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader, ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
bytesToSkip, encodedDataWithHeader.length - bytesToSkip); bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
final DataInputStream dis = new DataInputStream(bais); final DataInputStream dis = new DataInputStream(bais);

View File

@ -60,7 +60,7 @@ public class HFileBlockDefaultEncodingContext implements
private ByteArrayOutputStream encodedStream = new ByteArrayOutputStream(); private ByteArrayOutputStream encodedStream = new ByteArrayOutputStream();
private DataOutputStream dataOut = new DataOutputStream(encodedStream); private DataOutputStream dataOut = new DataOutputStream(encodedStream);
private final byte[] dummyHeader; private byte[] dummyHeader;
/** /**
* @param compressionAlgorithm compression algorithm used * @param compressionAlgorithm compression algorithm used
@ -87,7 +87,12 @@ public class HFileBlockDefaultEncodingContext implements
} }
} }
dummyHeader = Preconditions.checkNotNull(headerBytes, dummyHeader = Preconditions.checkNotNull(headerBytes,
"Please pass HFileBlock.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes"); "Please pass HConstants.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes");
}
@Override
public void setDummyHeader(byte[] headerBytes) {
dummyHeader = headerBytes;
} }
/** /**
@ -107,7 +112,7 @@ public class HFileBlockDefaultEncodingContext implements
public void postEncoding(BlockType blockType) public void postEncoding(BlockType blockType)
throws IOException { throws IOException {
dataOut.flush(); dataOut.flush();
compressAfterEncoding(encodedStream.toByteArray(), blockType); compressAfterEncodingWithBlockType(encodedStream.toByteArray(), blockType);
this.blockType = blockType; this.blockType = blockType;
} }
@ -116,7 +121,7 @@ public class HFileBlockDefaultEncodingContext implements
* @param blockType * @param blockType
* @throws IOException * @throws IOException
*/ */
public void compressAfterEncoding(byte[] uncompressedBytesWithHeader, public void compressAfterEncodingWithBlockType(byte[] uncompressedBytesWithHeader,
BlockType blockType) throws IOException { BlockType blockType) throws IOException {
compressAfterEncoding(uncompressedBytesWithHeader, blockType, dummyHeader); compressAfterEncoding(uncompressedBytesWithHeader, blockType, dummyHeader);
} }
@ -187,10 +192,4 @@ public class HFileBlockDefaultEncodingContext implements
public DataBlockEncoding getDataBlockEncoding() { public DataBlockEncoding getDataBlockEncoding() {
return this.encodingAlgo; return this.encodingAlgo;
} }
@Override
public int getHeaderSize() {
return this.dummyHeader.length;
}
} }

View File

@ -59,9 +59,9 @@ public interface HFileBlockEncodingContext {
public Compression.Algorithm getCompression(); public Compression.Algorithm getCompression();
/** /**
* @return the header size used * sets the dummy header bytes
*/ */
public int getHeaderSize(); public void setDummyHeader(byte[] headerBytes);
/** /**
* @return the {@link DataBlockEncoding} encoding used * @return the {@link DataBlockEncoding} encoding used

View File

@ -580,7 +580,7 @@ public class FixedFileTrailer {
/** /**
* Returns the minor version of this HFile format * Returns the minor version of this HFile format
*/ */
int getMinorVersion() { public int getMinorVersion() {
return minorVersion; return minorVersion;
} }

View File

@ -813,7 +813,7 @@ public class HFileBlock implements Cacheable {
if (blockType == BlockType.DATA) { if (blockType == BlockType.DATA) {
encodeDataBlockForDisk(); encodeDataBlockForDisk();
} else { } else {
defaultBlockEncodingCtx.compressAfterEncoding( defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
uncompressedBytesWithHeader, blockType); uncompressedBytesWithHeader, blockType);
onDiskBytesWithHeader = onDiskBytesWithHeader =
defaultBlockEncodingCtx.getOnDiskBytesWithHeader(); defaultBlockEncodingCtx.getOnDiskBytesWithHeader();
@ -1748,13 +1748,30 @@ public class HFileBlock implements Cacheable {
/** /**
* Maps a minor version to the size of the header. * Maps a minor version to the size of the header.
*/ */
static private int headerSize(int minorVersion) { public static int headerSize(int minorVersion) {
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) { if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
} }
return HConstants.HFILEBLOCK_HEADER_SIZE; return HConstants.HFILEBLOCK_HEADER_SIZE;
} }
/**
* Return the appropriate DUMMY_HEADER for the minor version
*/
public byte[] getDummyHeaderForVersion() {
return getDummyHeaderForVersion(minorVersion);
}
/**
* Return the appropriate DUMMY_HEADER for the minor version
*/
static private byte[] getDummyHeaderForVersion(int minorVersion) {
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
return DUMMY_HEADER_NO_CHECKSUM;
}
return HConstants.HFILEBLOCK_DUMMY_HEADER;
}
/** /**
* Convert the contents of the block header into a human readable string. * Convert the contents of the block header into a human readable string.
* This is mostly helpful for debugging. This assumes that the block * This is mostly helpful for debugging. This assumes that the block

View File

@ -57,7 +57,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
*/ */
public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk, public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
DataBlockEncoding inCache) { DataBlockEncoding inCache) {
this(onDisk, inCache, null); this(onDisk, inCache, HConstants.HFILEBLOCK_DUMMY_HEADER);
} }
/** /**
@ -71,7 +71,6 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
*/ */
public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk, public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
DataBlockEncoding inCache, byte[] dummyHeader) { DataBlockEncoding inCache, byte[] dummyHeader) {
dummyHeader = dummyHeader == null ? HConstants.HFILEBLOCK_DUMMY_HEADER : dummyHeader;
this.onDisk = onDisk != null ? this.onDisk = onDisk != null ?
onDisk : DataBlockEncoding.NONE; onDisk : DataBlockEncoding.NONE;
this.inCache = inCache != null ? this.inCache = inCache != null ?
@ -96,18 +95,25 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
public static HFileDataBlockEncoder createFromFileInfo( public static HFileDataBlockEncoder createFromFileInfo(
FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache) FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
throws IOException { throws IOException {
boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
&& preferredEncodingInCache != DataBlockEncoding.NONE;
byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING); byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
if (dataBlockEncodingType == null) { if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
return NoOpDataBlockEncoder.INSTANCE; return NoOpDataBlockEncoder.INSTANCE;
} }
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
DataBlockEncoding onDisk; DataBlockEncoding onDisk;
if (dataBlockEncodingType == null) {
onDisk = DataBlockEncoding.NONE;
} else {
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
try { try {
onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr); onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} catch (IllegalArgumentException ex) { } catch (IllegalArgumentException ex) {
throw new IOException("Invalid data block encoding type in file info: " + throw new IOException("Invalid data block encoding type in file info: "
dataBlockEncodingStr, ex); + dataBlockEncodingStr, ex);
}
} }
DataBlockEncoding inCache; DataBlockEncoding inCache;
@ -123,6 +129,8 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
// but new files will be generated with the new encoding. // but new files will be generated with the new encoding.
inCache = onDisk; inCache = onDisk;
} }
// TODO: we are not passing proper header size here based on minor version, presumably
// because this encoder will never actually be used for encoding.
return new HFileDataBlockEncoderImpl(onDisk, inCache); return new HFileDataBlockEncoderImpl(onDisk, inCache);
} }
@ -189,7 +197,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
BlockType blockType) throws IOException { BlockType blockType) throws IOException {
if (onDisk == DataBlockEncoding.NONE) { if (onDisk == DataBlockEncoding.NONE) {
// there is no need to encode the block before writing it to disk // there is no need to encode the block before writing it to disk
((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncoding( ((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncodingWithBlockType(
in.array(), blockType); in.array(), blockType);
return; return;
} }
@ -231,12 +239,13 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
private HFileBlock encodeDataBlock(HFileBlock block, private HFileBlock encodeDataBlock(HFileBlock block,
DataBlockEncoding algo, boolean includesMemstoreTS, DataBlockEncoding algo, boolean includesMemstoreTS,
HFileBlockEncodingContext encodingCtx) { HFileBlockEncodingContext encodingCtx) {
encodingCtx.setDummyHeader(block.getDummyHeaderForVersion());
encodeBufferToHFileBlockBuffer( encodeBufferToHFileBlockBuffer(
block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx); block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx);
byte[] encodedUncompressedBytes = byte[] encodedUncompressedBytes =
encodingCtx.getUncompressedBytesWithHeader(); encodingCtx.getUncompressedBytesWithHeader();
ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes); ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes);
int sizeWithoutHeader = bufferWrapper.limit() - encodingCtx.getHeaderSize(); int sizeWithoutHeader = bufferWrapper.limit() - block.headerSize();
HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA, HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
block.getOnDiskSizeWithoutHeader(), block.getOnDiskSizeWithoutHeader(),
sizeWithoutHeader, block.getPrevBlockOffset(), sizeWithoutHeader, block.getPrevBlockOffset(),

View File

@ -61,7 +61,7 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder {
HFileBlockDefaultEncodingContext defaultContext = HFileBlockDefaultEncodingContext defaultContext =
(HFileBlockDefaultEncodingContext) encodeCtx; (HFileBlockDefaultEncodingContext) encodeCtx;
defaultContext.compressAfterEncoding(in.array(), blockType); defaultContext.compressAfterEncodingWithBlockType(in.array(), blockType);
} }
@Override @Override

View File

@ -1532,6 +1532,10 @@ public class StoreFile {
return reader.getTrailer().getMajorVersion(); return reader.getTrailer().getMajorVersion();
} }
public int getHFileMinorVersion() {
return reader.getTrailer().getMinorVersion();
}
public HFile.Reader getHFileReader() { public HFile.Reader getHFileReader() {
return reader; return reader;
} }

View File

@ -419,7 +419,7 @@ public class TestHFileBlock {
new byte[rawBuf.array().length + headerLen]; new byte[rawBuf.array().length + headerLen];
System.arraycopy(rawBuf.array(), 0, rawBufWithHeader, System.arraycopy(rawBuf.array(), 0, rawBufWithHeader,
headerLen, rawBuf.array().length); headerLen, rawBuf.array().length);
defaultEncodingCtx.compressAfterEncoding(rawBufWithHeader, defaultEncodingCtx.compressAfterEncodingWithBlockType(rawBufWithHeader,
BlockType.DATA); BlockType.DATA);
encodedResultWithHeader = encodedResultWithHeader =
defaultEncodingCtx.getUncompressedBytesWithHeader(); defaultEncodingCtx.getUncompressedBytesWithHeader();

View File

@ -494,7 +494,7 @@ public class TestHFileBlockCompatibility {
if (blockType == BlockType.DATA) { if (blockType == BlockType.DATA) {
encodeDataBlockForDisk(); encodeDataBlockForDisk();
} else { } else {
defaultBlockEncodingCtx.compressAfterEncoding( defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
uncompressedBytesWithHeader, blockType); uncompressedBytesWithHeader, blockType);
onDiskBytesWithHeader = onDiskBytesWithHeader =
defaultBlockEncodingCtx.getOnDiskBytesWithHeader(); defaultBlockEncodingCtx.getOnDiskBytesWithHeader();

View File

@ -104,6 +104,41 @@ public class TestHFileDataBlockEncoder {
} }
} }
/** Test for HBASE-5746. */
@Test
public void testHeaderSizeInCacheWithoutChecksum() throws Exception {
int headerSize = HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
// Create some KVs and create the block with old-style header.
ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer(
generator.generateTestKeyValues(60), includesMemstoreTS);
int size = keyValues.limit();
ByteBuffer buf = ByteBuffer.allocate(size + headerSize);
buf.position(headerSize);
keyValues.rewind();
buf.put(keyValues);
HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
HFileBlock.FILL_HEADER, 0, includesMemstoreTS,
HFileBlock.MINOR_VERSION_NO_CHECKSUM, 0, ChecksumType.NULL.getCode(), 0);
HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(createBlockOnDisk(block), false);
assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length);
}
private HFileBlock createBlockOnDisk(HFileBlock block) throws IOException {
int size;
HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext(
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(),
HConstants.HFILEBLOCK_DUMMY_HEADER);
context.setDummyHeader(block.getDummyHeaderForVersion());
blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
includesMemstoreTS, context, block.getBlockType());
byte[] encodedBytes = context.getUncompressedBytesWithHeader();
size = encodedBytes.length - block.getDummyHeaderForVersion().length;
return new HFileBlock(context.getBlockType(), size, size, -1,
ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0, includesMemstoreTS,
block.getMinorVersion(), block.getBytesPerChecksum(), block.getChecksumType(),
block.getOnDiskDataSizeWithHeader());
}
/** /**
* Test writing to disk. * Test writing to disk.
* @throws IOException * @throws IOException
@ -112,19 +147,7 @@ public class TestHFileDataBlockEncoder {
public void testEncodingWritePath() throws IOException { public void testEncodingWritePath() throws IOException {
// usually we have just block without headers, but don't complicate that // usually we have just block without headers, but don't complicate that
HFileBlock block = getSampleHFileBlock(); HFileBlock block = getSampleHFileBlock();
HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext( HFileBlock blockOnDisk = createBlockOnDisk(block);
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), HConstants.HFILEBLOCK_DUMMY_HEADER);
blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
includesMemstoreTS, context, block.getBlockType());
byte[] encodedBytes = context.getUncompressedBytesWithHeader();
int size = encodedBytes.length - HConstants.HFILEBLOCK_HEADER_SIZE;
HFileBlock blockOnDisk =
new HFileBlock(context.getBlockType(), size, size, -1,
ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0,
includesMemstoreTS, block.getMinorVersion(),
block.getBytesPerChecksum(), block.getChecksumType(),
block.getOnDiskDataSizeWithHeader());
if (blockEncoder.getEncodingOnDisk() != if (blockEncoder.getEncodingOnDisk() !=
DataBlockEncoding.NONE) { DataBlockEncoding.NONE) {

View File

@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock; import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;
import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.CompressionOutputStream;
@ -119,6 +120,7 @@ public class DataBlockEncodingTool {
private long totalCFLength = 0; private long totalCFLength = 0;
private byte[] rawKVs; private byte[] rawKVs;
private int minorVersion = 0;
private final String compressionAlgorithmName; private final String compressionAlgorithmName;
private final Algorithm compressionAlgorithm; private final Algorithm compressionAlgorithm;
@ -228,7 +230,7 @@ public class DataBlockEncodingTool {
List<Iterator<KeyValue>> codecIterators = List<Iterator<KeyValue>> codecIterators =
new ArrayList<Iterator<KeyValue>>(); new ArrayList<Iterator<KeyValue>>();
for(EncodedDataBlock codec : codecs) { for(EncodedDataBlock codec : codecs) {
codecIterators.add(codec.getIterator()); codecIterators.add(codec.getIterator(HFileBlock.headerSize(minorVersion)));
} }
int j = 0; int j = 0;
@ -320,7 +322,7 @@ public class DataBlockEncodingTool {
Iterator<KeyValue> it; Iterator<KeyValue> it;
it = codec.getIterator(); it = codec.getIterator(HFileBlock.headerSize(minorVersion));
// count only the algorithm time, without memory allocations // count only the algorithm time, without memory allocations
// (expect first time) // (expect first time)
@ -590,6 +592,7 @@ public class DataBlockEncodingTool {
// run the utilities // run the utilities
DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName); DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
comp.minorVersion = reader.getHFileMinorVersion();
comp.checkStatistics(scanner, kvLimit); comp.checkStatistics(scanner, kvLimit);
if (doVerify) { if (doVerify) {
comp.verifyCodecs(scanner, kvLimit); comp.verifyCodecs(scanner, kvLimit);