HBASE-5746 HFileDataBlockEncoderImpl uses wrong header size when reading HFiles with no checksums (0.96)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1478966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
sershe 2013-05-03 20:31:46 +00:00
parent 515d19c52e
commit d30d4ef8e7
12 changed files with 103 additions and 47 deletions

View File

@ -70,12 +70,13 @@ public class EncodedDataBlock {
/**
* Provides access to compressed value.
* @param headerSize header size of the block.
* @return Forwards sequential iterator.
*/
public Iterator<KeyValue> getIterator() {
public Iterator<KeyValue> getIterator(int headerSize) {
final int rawSize = rawKVs.length;
byte[] encodedDataWithHeader = getEncodedData();
int bytesToSkip = encodingCtx.getHeaderSize() + Bytes.SIZEOF_SHORT;
int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
final DataInputStream dis = new DataInputStream(bais);

View File

@ -60,7 +60,7 @@ public class HFileBlockDefaultEncodingContext implements
private ByteArrayOutputStream encodedStream = new ByteArrayOutputStream();
private DataOutputStream dataOut = new DataOutputStream(encodedStream);
private final byte[] dummyHeader;
private byte[] dummyHeader;
/**
* @param compressionAlgorithm compression algorithm used
@ -87,7 +87,12 @@ public class HFileBlockDefaultEncodingContext implements
}
}
dummyHeader = Preconditions.checkNotNull(headerBytes,
"Please pass HFileBlock.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes");
"Please pass HConstants.HFILEBLOCK_DUMMY_HEADER instead of null for param headerBytes");
}
@Override
public void setDummyHeader(byte[] headerBytes) {
dummyHeader = headerBytes;
}
/**
@ -107,7 +112,7 @@ public class HFileBlockDefaultEncodingContext implements
public void postEncoding(BlockType blockType)
throws IOException {
dataOut.flush();
compressAfterEncoding(encodedStream.toByteArray(), blockType);
compressAfterEncodingWithBlockType(encodedStream.toByteArray(), blockType);
this.blockType = blockType;
}
@ -116,7 +121,7 @@ public class HFileBlockDefaultEncodingContext implements
* @param blockType
* @throws IOException
*/
public void compressAfterEncoding(byte[] uncompressedBytesWithHeader,
public void compressAfterEncodingWithBlockType(byte[] uncompressedBytesWithHeader,
BlockType blockType) throws IOException {
compressAfterEncoding(uncompressedBytesWithHeader, blockType, dummyHeader);
}
@ -187,10 +192,4 @@ public class HFileBlockDefaultEncodingContext implements
public DataBlockEncoding getDataBlockEncoding() {
return this.encodingAlgo;
}
@Override
public int getHeaderSize() {
return this.dummyHeader.length;
}
}

View File

@ -59,9 +59,9 @@ public interface HFileBlockEncodingContext {
public Compression.Algorithm getCompression();
/**
* @return the header size used
* sets the dummy header bytes
*/
public int getHeaderSize();
public void setDummyHeader(byte[] headerBytes);
/**
* @return the {@link DataBlockEncoding} encoding used

View File

@ -580,7 +580,7 @@ public class FixedFileTrailer {
/**
* Returns the minor version of this HFile format
*/
int getMinorVersion() {
public int getMinorVersion() {
return minorVersion;
}

View File

@ -813,7 +813,7 @@ public class HFileBlock implements Cacheable {
if (blockType == BlockType.DATA) {
encodeDataBlockForDisk();
} else {
defaultBlockEncodingCtx.compressAfterEncoding(
defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
uncompressedBytesWithHeader, blockType);
onDiskBytesWithHeader =
defaultBlockEncodingCtx.getOnDiskBytesWithHeader();
@ -1748,13 +1748,30 @@ public class HFileBlock implements Cacheable {
/**
* Maps a minor version to the size of the header.
*/
static private int headerSize(int minorVersion) {
public static int headerSize(int minorVersion) {
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
}
return HConstants.HFILEBLOCK_HEADER_SIZE;
}
/**
* Return the appropriate DUMMY_HEADER for the minor version
*/
public byte[] getDummyHeaderForVersion() {
return getDummyHeaderForVersion(minorVersion);
}
/**
* Return the appropriate DUMMY_HEADER for the minor version
*/
static private byte[] getDummyHeaderForVersion(int minorVersion) {
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
return DUMMY_HEADER_NO_CHECKSUM;
}
return HConstants.HFILEBLOCK_DUMMY_HEADER;
}
/**
* Convert the contents of the block header into a human readable string.
* This is mostly helpful for debugging. This assumes that the block

View File

@ -57,7 +57,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
*/
public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
DataBlockEncoding inCache) {
this(onDisk, inCache, null);
this(onDisk, inCache, HConstants.HFILEBLOCK_DUMMY_HEADER);
}
/**
@ -71,7 +71,6 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
*/
public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
DataBlockEncoding inCache, byte[] dummyHeader) {
dummyHeader = dummyHeader == null ? HConstants.HFILEBLOCK_DUMMY_HEADER : dummyHeader;
this.onDisk = onDisk != null ?
onDisk : DataBlockEncoding.NONE;
this.inCache = inCache != null ?
@ -96,18 +95,25 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
public static HFileDataBlockEncoder createFromFileInfo(
FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
throws IOException {
boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
&& preferredEncodingInCache != DataBlockEncoding.NONE;
byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
if (dataBlockEncodingType == null) {
if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
return NoOpDataBlockEncoder.INSTANCE;
}
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
DataBlockEncoding onDisk;
if (dataBlockEncodingType == null) {
onDisk = DataBlockEncoding.NONE;
} else {
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
try {
onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} catch (IllegalArgumentException ex) {
throw new IOException("Invalid data block encoding type in file info: " +
dataBlockEncodingStr, ex);
throw new IOException("Invalid data block encoding type in file info: "
+ dataBlockEncodingStr, ex);
}
}
DataBlockEncoding inCache;
@ -123,6 +129,8 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
// but new files will be generated with the new encoding.
inCache = onDisk;
}
// TODO: we are not passing proper header size here based on minor version, presumably
// because this encoder will never actually be used for encoding.
return new HFileDataBlockEncoderImpl(onDisk, inCache);
}
@ -189,7 +197,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
BlockType blockType) throws IOException {
if (onDisk == DataBlockEncoding.NONE) {
// there is no need to encode the block before writing it to disk
((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncoding(
((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncodingWithBlockType(
in.array(), blockType);
return;
}
@ -231,12 +239,13 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
private HFileBlock encodeDataBlock(HFileBlock block,
DataBlockEncoding algo, boolean includesMemstoreTS,
HFileBlockEncodingContext encodingCtx) {
encodingCtx.setDummyHeader(block.getDummyHeaderForVersion());
encodeBufferToHFileBlockBuffer(
block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx);
byte[] encodedUncompressedBytes =
encodingCtx.getUncompressedBytesWithHeader();
ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes);
int sizeWithoutHeader = bufferWrapper.limit() - encodingCtx.getHeaderSize();
int sizeWithoutHeader = bufferWrapper.limit() - block.headerSize();
HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
block.getOnDiskSizeWithoutHeader(),
sizeWithoutHeader, block.getPrevBlockOffset(),

View File

@ -61,7 +61,7 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder {
HFileBlockDefaultEncodingContext defaultContext =
(HFileBlockDefaultEncodingContext) encodeCtx;
defaultContext.compressAfterEncoding(in.array(), blockType);
defaultContext.compressAfterEncodingWithBlockType(in.array(), blockType);
}
@Override

View File

@ -1532,6 +1532,10 @@ public class StoreFile {
return reader.getTrailer().getMajorVersion();
}
public int getHFileMinorVersion() {
return reader.getTrailer().getMinorVersion();
}
public HFile.Reader getHFileReader() {
return reader;
}

View File

@ -419,7 +419,7 @@ public class TestHFileBlock {
new byte[rawBuf.array().length + headerLen];
System.arraycopy(rawBuf.array(), 0, rawBufWithHeader,
headerLen, rawBuf.array().length);
defaultEncodingCtx.compressAfterEncoding(rawBufWithHeader,
defaultEncodingCtx.compressAfterEncodingWithBlockType(rawBufWithHeader,
BlockType.DATA);
encodedResultWithHeader =
defaultEncodingCtx.getUncompressedBytesWithHeader();

View File

@ -494,7 +494,7 @@ public class TestHFileBlockCompatibility {
if (blockType == BlockType.DATA) {
encodeDataBlockForDisk();
} else {
defaultBlockEncodingCtx.compressAfterEncoding(
defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
uncompressedBytesWithHeader, blockType);
onDiskBytesWithHeader =
defaultBlockEncodingCtx.getOnDiskBytesWithHeader();

View File

@ -104,6 +104,41 @@ public class TestHFileDataBlockEncoder {
}
}
/** Test for HBASE-5746. */
@Test
public void testHeaderSizeInCacheWithoutChecksum() throws Exception {
int headerSize = HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
// Create some KVs and create the block with old-style header.
ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer(
generator.generateTestKeyValues(60), includesMemstoreTS);
int size = keyValues.limit();
ByteBuffer buf = ByteBuffer.allocate(size + headerSize);
buf.position(headerSize);
keyValues.rewind();
buf.put(keyValues);
HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
HFileBlock.FILL_HEADER, 0, includesMemstoreTS,
HFileBlock.MINOR_VERSION_NO_CHECKSUM, 0, ChecksumType.NULL.getCode(), 0);
HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(createBlockOnDisk(block), false);
assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length);
}
private HFileBlock createBlockOnDisk(HFileBlock block) throws IOException {
int size;
HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext(
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(),
HConstants.HFILEBLOCK_DUMMY_HEADER);
context.setDummyHeader(block.getDummyHeaderForVersion());
blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
includesMemstoreTS, context, block.getBlockType());
byte[] encodedBytes = context.getUncompressedBytesWithHeader();
size = encodedBytes.length - block.getDummyHeaderForVersion().length;
return new HFileBlock(context.getBlockType(), size, size, -1,
ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0, includesMemstoreTS,
block.getMinorVersion(), block.getBytesPerChecksum(), block.getChecksumType(),
block.getOnDiskDataSizeWithHeader());
}
/**
* Test writing to disk.
* @throws IOException
@ -112,19 +147,7 @@ public class TestHFileDataBlockEncoder {
public void testEncodingWritePath() throws IOException {
// usually we have just block without headers, but don't complicate that
HFileBlock block = getSampleHFileBlock();
HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext(
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), HConstants.HFILEBLOCK_DUMMY_HEADER);
blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
includesMemstoreTS, context, block.getBlockType());
byte[] encodedBytes = context.getUncompressedBytesWithHeader();
int size = encodedBytes.length - HConstants.HFILEBLOCK_HEADER_SIZE;
HFileBlock blockOnDisk =
new HFileBlock(context.getBlockType(), size, size, -1,
ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0,
includesMemstoreTS, block.getMinorVersion(),
block.getBytesPerChecksum(), block.getChecksumType(),
block.getOnDiskDataSizeWithHeader());
HFileBlock blockOnDisk = createBlockOnDisk(block);
if (blockEncoder.getEncodingOnDisk() !=
DataBlockEncoding.NONE) {

View File

@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.compress.CompressionOutputStream;
@ -119,6 +120,7 @@ public class DataBlockEncodingTool {
private long totalCFLength = 0;
private byte[] rawKVs;
private int minorVersion = 0;
private final String compressionAlgorithmName;
private final Algorithm compressionAlgorithm;
@ -228,7 +230,7 @@ public class DataBlockEncodingTool {
List<Iterator<KeyValue>> codecIterators =
new ArrayList<Iterator<KeyValue>>();
for(EncodedDataBlock codec : codecs) {
codecIterators.add(codec.getIterator());
codecIterators.add(codec.getIterator(HFileBlock.headerSize(minorVersion)));
}
int j = 0;
@ -320,7 +322,7 @@ public class DataBlockEncodingTool {
Iterator<KeyValue> it;
it = codec.getIterator();
it = codec.getIterator(HFileBlock.headerSize(minorVersion));
// count only the algorithm time, without memory allocations
// (expect first time)
@ -590,6 +592,7 @@ public class DataBlockEncodingTool {
// run the utilities
DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
comp.minorVersion = reader.getHFileMinorVersion();
comp.checkStatistics(scanner, kvLimit);
if (doVerify) {
comp.verifyCodecs(scanner, kvLimit);