HBASE-11625 - Verifies data before building HFileBlock. - Adds HFileBlock.Header class which contains information about location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum.

Change-Id: I107e9636b28abb6b15ec329e885f1e31b1b1b988

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
Apekshit 2016-05-05 17:05:17 -07:00 committed by stack
parent 9ee0cbb995
commit 48b77dbcbc
3 changed files with 126 additions and 72 deletions

View File

@ -78,46 +78,35 @@ public class ChecksumUtil {
}
/**
* Validates that the data in the specified HFileBlock matches the
* checksum. Generates the checksum for the data and
* then validate that it matches the value stored in the header.
* If there is a checksum mismatch, then return false. Otherwise
* return true.
* The header is extracted from the specified HFileBlock while the
* data-to-be-verified is extracted from 'data'.
* Validates that the data in the specified HFileBlock matches the checksum. Generates the
* checksums for the data and then validate that it matches those stored in the end of the data.
* @param buffer Contains the data in following order: HFileBlock header, data, checksums.
* @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging.
* @param offset offset of the data being validated. Only used for logging.
* @param hdrSize Size of the block header in {@code data}. Only used for logging.
* @return True if checksum matches, else false.
*/
static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block,
byte[] data, int hdrSize) throws IOException {
// If this is an older version of the block that does not have
// checksums, then return false indicating that checksum verification
// did not succeed. Actually, this method should never be called
// when the minorVersion is 0, thus this is a defensive check for a
// cannot-happen case. Since this is a cannot-happen case, it is
// better to return false to indicate a checksum validation failure.
if (!block.getHFileContext().isUseHBaseChecksum()) {
return false;
}
// Get a checksum object based on the type of checksum that is
// set in the HFileBlock header. A ChecksumType.NULL indicates that
// the caller is not interested in validating checksums, so we
// always return true.
ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize)
throws IOException {
// A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
// so we always return true.
ChecksumType cktype =
ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
if (cktype == ChecksumType.NULL) {
return true; // No checksum validations needed for this block.
}
// read in the stored value of the checksum size from the header.
int bytesPerChecksum = block.getBytesPerChecksum();
int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
DataChecksum dataChecksum = DataChecksum.newDataChecksum(
cktype.getDataChecksumType(), bytesPerChecksum);
assert dataChecksum != null;
int sizeWithHeader = block.getOnDiskDataSizeWithHeader();
int onDiskDataSizeWithHeader =
buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
if (LOG.isTraceEnabled()) {
LOG.info("dataLength=" + data.length
+ ", sizeWithHeader=" + sizeWithHeader
LOG.info("dataLength=" + buffer.capacity()
+ ", sizeWithHeader=" + onDiskDataSizeWithHeader
+ ", checksumType=" + cktype.getName()
+ ", file=" + pathName
+ ", offset=" + offset
@ -125,8 +114,10 @@ public class ChecksumUtil {
+ ", bytesPerChecksum=" + bytesPerChecksum);
}
try {
dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), pathName, 0);
ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader)
.limit(buffer.capacity());
dataChecksum.verifyChunkedSums(data, checksums, pathName, 0);
} catch (ChecksumException e) {
return false;
}

View File

@ -276,6 +276,26 @@ public class HFileBlock implements Cacheable {
CacheableDeserializerIdManager.registerDeserializer(BLOCK_DESERIALIZER);
}
// Todo: encapsulate Header related logic in this inner class.
static class Header {
// Format of header is:
// 8 bytes - block magic
// 4 bytes int - onDiskSizeWithoutHeader
// 4 bytes int - uncompressedSizeWithoutHeader
// 8 bytes long - prevBlockOffset
// The following 3 are only present if header contains checksum information
// 1 byte - checksum type
// 4 byte int - bytes per checksum
// 4 byte int - onDiskDataSizeWithHeader
static int BLOCK_MAGIC_INDEX = 0;
static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8;
static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12;
static int PREV_BLOCK_OFFSET_INDEX = 16;
static int CHECKSUM_TYPE_INDEX = 24;
static int BYTES_PER_CHECKSUM_INDEX = 25;
static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29;
}
/**
* Copy constructor. Creates a shallow copy of {@code that}'s buffer.
*/
@ -305,7 +325,7 @@ public class HFileBlock implements Cacheable {
* @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
* @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
* @param prevBlockOffset see {@link #prevBlockOffset}
* @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
* @param b block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
* uncompressed data.
* @param fillHeader when true, write the first 4 header fields into passed buffer.
* @param offset the file offset the block was read from
@ -335,12 +355,13 @@ public class HFileBlock implements Cacheable {
final int nextBlockOnDiskSize, HFileContext fileContext) throws IOException {
buf.rewind();
final BlockType blockType = BlockType.read(buf);
final int onDiskSizeWithoutHeader = buf.getInt();
final int uncompressedSizeWithoutHeader = buf.getInt();
final long prevBlockOffset = buf.getLong();
byte checksumType = buf.get();
int bytesPerChecksum = buf.getInt();
int onDiskDataSizeWithHeader = buf.getInt();
final int onDiskSizeWithoutHeader = buf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
final int uncompressedSizeWithoutHeader =
buf.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
final long prevBlockOffset = buf.getLong(Header.PREV_BLOCK_OFFSET_INDEX);
byte checksumType = buf.get(Header.CHECKSUM_TYPE_INDEX);
int bytesPerChecksum = buf.getInt(Header.BYTES_PER_CHECKSUM_INDEX);
int onDiskDataSizeWithHeader = buf.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
// This constructor is called when we deserialize a block from cache and when we read a block in
// from the fs. fileCache is null when deserialized from cache so need to make up one.
HFileContextBuilder fileContextBuilder = fileContext != null?
@ -384,14 +405,13 @@ public class HFileBlock implements Cacheable {
}
/**
* Parse total ondisk size including header and checksum. Its second field in header after
* the magic bytes.
* Parse total ondisk size including header and checksum.
* @param headerBuf Header ByteBuffer. Presumed exact size of header.
* @return Size of the block with header included.
*/
private static int getOnDiskSizeWithHeader(final ByteBuffer headerBuf) {
// Set hbase checksum to true always calling headerSize.
return headerBuf.getInt(BlockType.MAGIC_LENGTH) + headerSize(true);
return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + headerSize(true);
}
/**
@ -1459,8 +1479,7 @@ public class HFileBlock implements Cacheable {
* @throws IOException
*/
protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size,
boolean peekIntoNextBlock, long fileOffset, boolean pread)
throws IOException {
boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) {
// We are asked to read the next block's header as well, but there is
// not enough room in the array.
@ -1647,9 +1666,8 @@ public class HFileBlock implements Cacheable {
* If HBase checksum is switched off, then use HDFS checksum.
* @return the HFileBlock or null if there is a HBase checksum mismatch
*/
private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum)
throws IOException {
protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) throws IOException {
if (offset < 0) {
throw new IOException("Invalid offset=" + offset + " trying to read "
+ "block (onDiskSize=" + onDiskSizeWithHeaderL + ")");
@ -1691,20 +1709,23 @@ public class HFileBlock implements Cacheable {
// Do a few checks before we go instantiate HFileBlock.
assert onDiskSizeWithHeader > this.hdrSize;
verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset);
ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader);
// Verify checksum of the data before using it for building HFileBlock.
if (verifyChecksum &&
!validateChecksum(offset, onDiskBlockByteBuffer.asReadOnlyBuffer(), hdrSize)) {
return null;
}
// The onDiskBlock will become the headerAndDataBuffer for this block.
// If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
// contains the header of next block, so no need to set next block's header in it.
HFileBlock hFileBlock =
new HFileBlock(new SingleByteBuff(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader)),
new HFileBlock(new SingleByteBuff(onDiskBlockByteBuffer),
this.fileContext.isUseHBaseChecksum(), MemoryType.EXCLUSIVE, offset,
nextBlockOnDiskSize, fileContext);
// Run check on uncompressed sizings.
if (!fileContext.isCompressedOrEncrypted()) {
hFileBlock.sanityCheckUncompressed();
}
if (verifyChecksum && !validateBlockChecksum(hFileBlock, offset, onDiskBlock, hdrSize)) {
return null;
}
if (LOG.isTraceEnabled()) {
LOG.trace("Read " + hFileBlock);
}
@ -1737,15 +1758,21 @@ public class HFileBlock implements Cacheable {
}
/**
* Generates the checksum for the header as well as the data and
* then validates that it matches the value stored in the header.
* If there is a checksum mismatch, then return false. Otherwise
* return true.
* Generates the checksum for the header as well as the data and then validates it.
* If the block doesn't uses checksum, returns false.
* @return True if checksum matches, else false.
*/
protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data,
int hdrSize)
throws IOException {
return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize);
protected boolean validateChecksum(long offset, ByteBuffer data, int hdrSize)
throws IOException {
// If this is an older version of the block that does not have checksums, then return false
// indicating that checksum verification did not succeed. Actually, this method should never
// be called when the minorVersion is 0, thus this is a defensive check for a cannot-happen
// case. Since this is a cannot-happen case, it is better to return false to indicate a
// checksum validation failure.
if (!fileContext.isUseHBaseChecksum()) {
return false;
}
return ChecksumUtil.validateChecksum(data, pathName, offset, hdrSize);
}
@Override

View File

@ -108,9 +108,9 @@ public class TestChecksum {
ChecksumType cktype = itr.next();
Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + cktype.getName());
FSDataOutputStream os = fs.create(path);
HFileContext meta = new HFileContextBuilder().
withChecksumType(cktype).
build();
HFileContext meta = new HFileContextBuilder()
.withChecksumType(cktype)
.build();
HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta);
DataOutputStream dos = hbw.startWriting(BlockType.DATA);
for (int i = 0; i < 1000; ++i) {
@ -189,7 +189,7 @@ public class TestChecksum {
.withIncludesTags(useTags)
.withHBaseCheckSum(true)
.build();
HFileBlock.FSReader hbr = new FSReaderImplTest(is, totalSize, fs, path, meta);
HFileBlock.FSReader hbr = new CorruptedFSReaderImpl(is, totalSize, fs, path, meta);
HFileBlock b = hbr.readBlockData(0, -1, pread);
b.sanityCheck();
assertEquals(4936, b.getUncompressedSizeWithoutHeader());
@ -231,7 +231,7 @@ public class TestChecksum {
HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false);
assertEquals(false, newfs.useHBaseChecksum());
is = new FSDataInputStreamWrapper(newfs, path);
hbr = new FSReaderImplTest(is, totalSize, newfs, path, meta);
hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta);
b = hbr.readBlockData(0, -1, pread);
is.close();
b.sanityCheck();
@ -252,7 +252,7 @@ public class TestChecksum {
}
}
/**
/**
* Test different values of bytesPerChecksum
*/
@Test
@ -340,20 +340,56 @@ public class TestChecksum {
}
/**
* A class that introduces hbase-checksum failures while
* reading data from hfiles. This should trigger the hdfs level
* checksum validations.
* This class is to test checksum behavior when data is corrupted. It mimics the following
* behavior:
* - When fs checksum is disabled, hbase may get corrupted data from hdfs. If verifyChecksum
* is true, it means hbase checksum is on and fs checksum is off, so we corrupt the data.
* - When fs checksum is enabled, hdfs will get a different copy from another node, and will
* always return correct data. So we don't corrupt the data when verifyChecksum for hbase is
* off.
*/
static private class FSReaderImplTest extends HFileBlock.FSReaderImpl {
public FSReaderImplTest(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs,
static private class CorruptedFSReaderImpl extends HFileBlock.FSReaderImpl {
/**
* If set to true, corrupt reads using readAtOffset(...).
*/
boolean corruptDataStream = false;
public CorruptedFSReaderImpl(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs,
Path path, HFileContext meta) throws IOException {
super(istream, fileSize, (HFileSystem) fs, path, meta);
}
@Override
protected boolean validateBlockChecksum(HFileBlock block, long offset,
byte[] data, int hdrSize) throws IOException {
return false; // checksum validation failure
protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) throws IOException {
if (verifyChecksum) {
corruptDataStream = true;
}
HFileBlock b = super.readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, pread,
verifyChecksum);
corruptDataStream = false;
return b;
}
@Override
protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size,
boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock,
fileOffset, pread);
if (!corruptDataStream) {
return returnValue;
}
// Corrupt 3rd character of block magic of next block's header.
if (peekIntoNextBlock) {
dest[destOffset + size + 3] = 0b00000000;
}
// We might be reading this block's header too, corrupt it.
dest[destOffset + 1] = 0b00000000;
// Corrupt non header data
if (size > hdrSize) {
dest[destOffset + hdrSize + 1] = 0b00000000;
}
return returnValue;
}
}
}