HBASE-11625 - Verifies data before building HFileBlock. - Adds HFileBlock.Header class which contains information about location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum.

Change-Id: I6777f2ddf8922691c84ca86d0cffa9a37dc879ae

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
Apekshit 2016-05-05 17:05:17 -07:00 committed by stack
parent d07d316113
commit 2f282aca15
3 changed files with 125 additions and 72 deletions

View File

@ -78,46 +78,35 @@ public class ChecksumUtil {
} }
/** /**
* Validates that the data in the specified HFileBlock matches the * Validates that the data in the specified HFileBlock matches the checksum. Generates the
* checksum. Generates the checksum for the data and * checksums for the data and then validate that it matches those stored in the end of the data.
* then validate that it matches the value stored in the header. * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
* If there is a checksum mismatch, then return false. Otherwise * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging.
* return true. * @param offset offset of the data being validated. Only used for logging.
* The header is extracted from the specified HFileBlock while the * @param hdrSize Size of the block header in {@code data}. Only used for logging.
* data-to-be-verified is extracted from 'data'. * @return True if checksum matches, else false.
*/ */
static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block, static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize)
byte[] data, int hdrSize) throws IOException { throws IOException {
// A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
// If this is an older version of the block that does not have // so we always return true.
// checksums, then return false indicating that checksum verification ChecksumType cktype =
// did not succeed. Actually, this method should never be called ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
// when the minorVersion is 0, thus this is a defensive check for a
// cannot-happen case. Since this is a cannot-happen case, it is
// better to return false to indicate a checksum validation failure.
if (!block.getHFileContext().isUseHBaseChecksum()) {
return false;
}
// Get a checksum object based on the type of checksum that is
// set in the HFileBlock header. A ChecksumType.NULL indicates that
// the caller is not interested in validating checksums, so we
// always return true.
ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
if (cktype == ChecksumType.NULL) { if (cktype == ChecksumType.NULL) {
return true; // No checksum validations needed for this block. return true; // No checksum validations needed for this block.
} }
// read in the stored value of the checksum size from the header. // read in the stored value of the checksum size from the header.
int bytesPerChecksum = block.getBytesPerChecksum(); int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
DataChecksum dataChecksum = DataChecksum.newDataChecksum( DataChecksum dataChecksum = DataChecksum.newDataChecksum(
cktype.getDataChecksumType(), bytesPerChecksum); cktype.getDataChecksumType(), bytesPerChecksum);
assert dataChecksum != null; assert dataChecksum != null;
int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); int onDiskDataSizeWithHeader =
buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
if (LOG.isTraceEnabled()) { if (LOG.isTraceEnabled()) {
LOG.info("dataLength=" + data.length LOG.info("dataLength=" + buffer.capacity()
+ ", sizeWithHeader=" + sizeWithHeader + ", sizeWithHeader=" + onDiskDataSizeWithHeader
+ ", checksumType=" + cktype.getName() + ", checksumType=" + cktype.getName()
+ ", file=" + pathName + ", file=" + pathName
+ ", offset=" + offset + ", offset=" + offset
@ -125,8 +114,10 @@ public class ChecksumUtil {
+ ", bytesPerChecksum=" + bytesPerChecksum); + ", bytesPerChecksum=" + bytesPerChecksum);
} }
try { try {
dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), pathName, 0); ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader)
.limit(buffer.capacity());
dataChecksum.verifyChunkedSums(data, checksums, pathName, 0);
} catch (ChecksumException e) { } catch (ChecksumException e) {
return false; return false;
} }

View File

@ -263,6 +263,26 @@ public class HFileBlock implements Cacheable {
CacheableDeserializerIdManager.registerDeserializer(BLOCK_DESERIALIZER); CacheableDeserializerIdManager.registerDeserializer(BLOCK_DESERIALIZER);
} }
// Todo: encapsulate Header related logic in this inner class.
static class Header {
// Format of header is:
// 8 bytes - block magic
// 4 bytes int - onDiskSizeWithoutHeader
// 4 bytes int - uncompressedSizeWithoutHeader
// 8 bytes long - prevBlockOffset
// The following 3 are only present if header contains checksum information
// 1 byte - checksum type
// 4 byte int - bytes per checksum
// 4 byte int - onDiskDataSizeWithHeader
static int BLOCK_MAGIC_INDEX = 0;
static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8;
static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12;
static int PREV_BLOCK_OFFSET_INDEX = 16;
static int CHECKSUM_TYPE_INDEX = 24;
static int BYTES_PER_CHECKSUM_INDEX = 25;
static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29;
}
/** /**
* Copy constructor. Creates a shallow copy of {@code that}'s buffer. * Copy constructor. Creates a shallow copy of {@code that}'s buffer.
*/ */
@ -292,7 +312,7 @@ public class HFileBlock implements Cacheable {
* @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader} * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
* @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader} * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
* @param prevBlockOffset see {@link #prevBlockOffset} * @param prevBlockOffset see {@link #prevBlockOffset}
* @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by * @param b block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
* uncompressed data. * uncompressed data.
* @param fillHeader when true, write the first 4 header fields into passed buffer. * @param fillHeader when true, write the first 4 header fields into passed buffer.
* @param offset the file offset the block was read from * @param offset the file offset the block was read from
@ -322,12 +342,13 @@ public class HFileBlock implements Cacheable {
final int nextBlockOnDiskSize, HFileContext fileContext) throws IOException { final int nextBlockOnDiskSize, HFileContext fileContext) throws IOException {
buf.rewind(); buf.rewind();
final BlockType blockType = BlockType.read(buf); final BlockType blockType = BlockType.read(buf);
final int onDiskSizeWithoutHeader = buf.getInt(); final int onDiskSizeWithoutHeader = buf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
final int uncompressedSizeWithoutHeader = buf.getInt(); final int uncompressedSizeWithoutHeader =
final long prevBlockOffset = buf.getLong(); buf.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
byte checksumType = buf.get(); final long prevBlockOffset = buf.getLong(Header.PREV_BLOCK_OFFSET_INDEX);
int bytesPerChecksum = buf.getInt(); byte checksumType = buf.get(Header.CHECKSUM_TYPE_INDEX);
int onDiskDataSizeWithHeader = buf.getInt(); int bytesPerChecksum = buf.getInt(Header.BYTES_PER_CHECKSUM_INDEX);
int onDiskDataSizeWithHeader = buf.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
// This constructor is called when we deserialize a block from cache and when we read a block in // This constructor is called when we deserialize a block from cache and when we read a block in
// from the fs. fileCache is null when deserialized from cache so need to make up one. // from the fs. fileCache is null when deserialized from cache so need to make up one.
HFileContextBuilder fileContextBuilder = fileContext != null? HFileContextBuilder fileContextBuilder = fileContext != null?
@ -370,14 +391,13 @@ public class HFileBlock implements Cacheable {
} }
/** /**
* Parse total ondisk size including header and checksum. Its second field in header after * Parse total ondisk size including header and checksum.
* the magic bytes.
* @param headerBuf Header ByteBuffer. Presumed exact size of header. * @param headerBuf Header ByteBuffer. Presumed exact size of header.
* @return Size of the block with header included. * @return Size of the block with header included.
*/ */
private static int getOnDiskSizeWithHeader(final ByteBuffer headerBuf) { private static int getOnDiskSizeWithHeader(final ByteBuffer headerBuf) {
// Set hbase checksum to true always calling headerSize. // Set hbase checksum to true always calling headerSize.
return headerBuf.getInt(BlockType.MAGIC_LENGTH) + headerSize(true); return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + headerSize(true);
} }
/** /**
@ -1409,8 +1429,7 @@ public class HFileBlock implements Cacheable {
* @throws IOException * @throws IOException
*/ */
protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size,
boolean peekIntoNextBlock, long fileOffset, boolean pread) boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
throws IOException {
if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) { if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) {
// We are asked to read the next block's header as well, but there is // We are asked to read the next block's header as well, but there is
// not enough room in the array. // not enough room in the array.
@ -1658,9 +1677,8 @@ public class HFileBlock implements Cacheable {
* If HBase checksum is switched off, then use HDFS checksum. * If HBase checksum is switched off, then use HDFS checksum.
* @return the HFileBlock or null if there is a HBase checksum mismatch * @return the HFileBlock or null if there is a HBase checksum mismatch
*/ */
private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) throws IOException {
throws IOException {
if (offset < 0) { if (offset < 0) {
throw new IOException("Invalid offset=" + offset + " trying to read " throw new IOException("Invalid offset=" + offset + " trying to read "
+ "block (onDiskSize=" + onDiskSizeWithHeaderL + ")"); + "block (onDiskSize=" + onDiskSizeWithHeaderL + ")");
@ -1704,20 +1722,22 @@ public class HFileBlock implements Cacheable {
// Do a few checks before we go instantiate HFileBlock. // Do a few checks before we go instantiate HFileBlock.
assert onDiskSizeWithHeader > this.hdrSize; assert onDiskSizeWithHeader > this.hdrSize;
verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset); verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset);
ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader);
// Verify checksum of the data before using it for building HFileBlock.
if (verifyChecksum &&
!validateChecksum(offset, onDiskBlockByteBuffer.asReadOnlyBuffer(), hdrSize)) {
return null;
}
// The onDiskBlock will become the headerAndDataBuffer for this block. // The onDiskBlock will become the headerAndDataBuffer for this block.
// If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
// contains the header of next block, so no need to set next block's header in it. // contains the header of next block, so no need to set next block's header in it.
HFileBlock hFileBlock = HFileBlock hFileBlock =
new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader), new HFileBlock(onDiskBlockByteBuffer, this.fileContext.isUseHBaseChecksum(), offset,
this.fileContext.isUseHBaseChecksum(), offset,
nextBlockOnDiskSize, fileContext); nextBlockOnDiskSize, fileContext);
// Run check on uncompressed sizings. // Run check on uncompressed sizings.
if (!fileContext.isCompressedOrEncrypted()) { if (!fileContext.isCompressedOrEncrypted()) {
hFileBlock.sanityCheckUncompressed(); hFileBlock.sanityCheckUncompressed();
} }
if (verifyChecksum && !validateBlockChecksum(hFileBlock, offset, onDiskBlock, hdrSize)) {
return null;
}
if (LOG.isTraceEnabled()) { if (LOG.isTraceEnabled()) {
LOG.trace("Read " + hFileBlock); LOG.trace("Read " + hFileBlock);
} }
@ -1748,15 +1768,21 @@ public class HFileBlock implements Cacheable {
} }
/** /**
* Generates the checksum for the header as well as the data and * Generates the checksum for the header as well as the data and then validates it.
* then validates that it matches the value stored in the header. * If the block doesn't uses checksum, returns false.
* If there is a checksum mismatch, then return false. Otherwise * @return True if checksum matches, else false.
* return true.
*/ */
protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data, protected boolean validateChecksum(long offset, ByteBuffer data, int hdrSize)
int hdrSize)
throws IOException { throws IOException {
return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize); // If this is an older version of the block that does not have checksums, then return false
// indicating that checksum verification did not succeed. Actually, this method should never
// be called when the minorVersion is 0, thus this is a defensive check for a cannot-happen
// case. Since this is a cannot-happen case, it is better to return false to indicate a
// checksum validation failure.
if (!fileContext.isUseHBaseChecksum()) {
return false;
}
return ChecksumUtil.validateChecksum(data, pathName, offset, hdrSize);
} }
@Override @Override

View File

@ -107,9 +107,9 @@ public class TestChecksum {
ChecksumType cktype = itr.next(); ChecksumType cktype = itr.next();
Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + cktype.getName()); Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + cktype.getName());
FSDataOutputStream os = fs.create(path); FSDataOutputStream os = fs.create(path);
HFileContext meta = new HFileContextBuilder(). HFileContext meta = new HFileContextBuilder()
withChecksumType(cktype). .withChecksumType(cktype)
build(); .build();
HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta);
DataOutputStream dos = hbw.startWriting(BlockType.DATA); DataOutputStream dos = hbw.startWriting(BlockType.DATA);
for (int i = 0; i < 1000; ++i) { for (int i = 0; i < 1000; ++i) {
@ -188,7 +188,7 @@ public class TestChecksum {
.withIncludesTags(useTags) .withIncludesTags(useTags)
.withHBaseCheckSum(true) .withHBaseCheckSum(true)
.build(); .build();
HFileBlock.FSReader hbr = new FSReaderImplTest(is, totalSize, fs, path, meta); HFileBlock.FSReader hbr = new CorruptedFSReaderImpl(is, totalSize, fs, path, meta);
HFileBlock b = hbr.readBlockData(0, -1, pread); HFileBlock b = hbr.readBlockData(0, -1, pread);
b.sanityCheck(); b.sanityCheck();
assertEquals(4936, b.getUncompressedSizeWithoutHeader()); assertEquals(4936, b.getUncompressedSizeWithoutHeader());
@ -230,7 +230,7 @@ public class TestChecksum {
HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false); HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false);
assertEquals(false, newfs.useHBaseChecksum()); assertEquals(false, newfs.useHBaseChecksum());
is = new FSDataInputStreamWrapper(newfs, path); is = new FSDataInputStreamWrapper(newfs, path);
hbr = new FSReaderImplTest(is, totalSize, newfs, path, meta); hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta);
b = hbr.readBlockData(0, -1, pread); b = hbr.readBlockData(0, -1, pread);
is.close(); is.close();
b.sanityCheck(); b.sanityCheck();
@ -339,20 +339,56 @@ public class TestChecksum {
} }
/** /**
* A class that introduces hbase-checksum failures while * This class is to test checksum behavior when data is corrupted. It mimics the following
* reading data from hfiles. This should trigger the hdfs level * behavior:
* checksum validations. * - When fs checksum is disabled, hbase may get corrupted data from hdfs. If verifyChecksum
* is true, it means hbase checksum is on and fs checksum is off, so we corrupt the data.
* - When fs checksum is enabled, hdfs will get a different copy from another node, and will
* always return correct data. So we don't corrupt the data when verifyChecksum for hbase is
* off.
*/ */
static private class FSReaderImplTest extends HFileBlock.FSReaderImpl { static private class CorruptedFSReaderImpl extends HFileBlock.FSReaderImpl {
public FSReaderImplTest(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, /**
* If set to true, corrupt reads using readAtOffset(...).
*/
boolean corruptDataStream = false;
public CorruptedFSReaderImpl(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs,
Path path, HFileContext meta) throws IOException { Path path, HFileContext meta) throws IOException {
super(istream, fileSize, (HFileSystem) fs, path, meta); super(istream, fileSize, (HFileSystem) fs, path, meta);
} }
@Override @Override
protected boolean validateBlockChecksum(HFileBlock block, long offset, protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
byte[] data, int hdrSize) throws IOException { long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) throws IOException {
return false; // checksum validation failure if (verifyChecksum) {
corruptDataStream = true;
}
HFileBlock b = super.readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, pread,
verifyChecksum);
corruptDataStream = false;
return b;
}
@Override
protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size,
boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock,
fileOffset, pread);
if (!corruptDataStream) {
return returnValue;
}
// Corrupt 3rd character of block magic of next block's header.
if (peekIntoNextBlock) {
dest[destOffset + size + 3] = 0b00000000;
}
// We might be reading this block's header too, corrupt it.
dest[destOffset + 1] = 0b00000000;
// Corrupt non header data
if (size > hdrSize) {
dest[destOffset + hdrSize + 1] = 0b00000000;
}
return returnValue;
} }
} }
} }