From d2daada970b424263533517d2a26e36e31ec2746 Mon Sep 17 00:00:00 2001 From: Zach York Date: Wed, 21 Mar 2018 15:37:09 -0700 Subject: [PATCH] HBASE-20447 Only fail cacheBlock if block collisions aren't related to next block metadata When we pread, we don't force the read to read all of the next block header. However, when we get into a race condition where two opener threads try to cache the same block and one thread read all of the next block header and the other one didn't, it will fail the open process. This is especially important in a splitting case where it will potentially fail the split process. Instead, in the caches, we should only fail if the required blocks are different. Signed-off-by: Andrew Purtell --- .../hbase/io/hfile/MemcachedBlockCache.java | 2 +- .../hadoop/hbase/io/hfile/BlockCacheUtil.java | 38 ++++++++++++- .../hadoop/hbase/io/hfile/Cacheable.java | 3 +- .../hadoop/hbase/io/hfile/HFileBlock.java | 28 +++++----- .../hadoop/hbase/io/hfile/LruBlockCache.java | 21 ++++--- .../hbase/io/hfile/bucket/BucketCache.java | 25 ++++++--- .../hadoop/hbase/io/hfile/CacheTestUtils.java | 13 ++++- .../hbase/io/hfile/TestCacheConfig.java | 2 +- .../hbase/io/hfile/TestCachedBlockQueue.java | 2 +- .../hadoop/hbase/io/hfile/TestHFileBlock.java | 25 ++++++++- .../hbase/io/hfile/TestLruBlockCache.java | 56 ++++++++++++++++++- .../io/hfile/bucket/TestBucketCache.java | 51 +++++++++++++++-- 12 files changed, 220 insertions(+), 46 deletions(-) diff --git a/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java b/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java index ba6ae1e39ea..51fe7542938 100644 --- a/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java +++ b/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java @@ -263,7 +263,7 @@ public class MemcachedBlockCache implements BlockCache { @Override public CachedData encode(HFileBlock block) { ByteBuffer bb = ByteBuffer.allocate(block.getSerializedLength()); - block.serialize(bb); + block.serialize(bb, true); return new CachedData(0, bb.array(), CachedData.MAX_SIZE); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java index 3c04fa81cb5..8a100aec437 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java @@ -33,6 +33,8 @@ import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram; import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Utilty for aggregating counts in CachedBlocks and toString/toJSON CachedBlocks and BlockCaches. @@ -41,6 +43,7 @@ import org.apache.hadoop.hbase.util.Bytes; @InterfaceAudience.Private public class BlockCacheUtil { + private static final Logger LOG = LoggerFactory.getLogger(BlockCacheUtil.class); public static final long NANOS_PER_SECOND = 1000000000; @@ -173,15 +176,44 @@ public class BlockCacheUtil { return cbsbf; } - public static int compareCacheBlock(Cacheable left, Cacheable right) { + private static int compareCacheBlock(Cacheable left, Cacheable right, + boolean includeNextBlockMetadata) { ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength()); - left.serialize(l); + left.serialize(l, includeNextBlockMetadata); ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength()); - right.serialize(r); + right.serialize(r, includeNextBlockMetadata); return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(), r.array(), r.arrayOffset(), r.limit()); } + /** + * Validate that the existing and newBlock are the same without including the nextBlockMetadata, + * if not, throw an exception. If they are the same without the nextBlockMetadata, + * return the comparison. + * + * @param existing block that is existing in the cache. + * @param newBlock block that is trying to be cached. + * @param cacheKey the cache key of the blocks. + * @return comparison of the existing block to the newBlock. + */ + public static int validateBlockAddition(Cacheable existing, Cacheable newBlock, + BlockCacheKey cacheKey) { + int comparison = compareCacheBlock(existing, newBlock, true); + if (comparison != 0) { + LOG.warn("Cached block contents differ, trying to just compare the block contents " + + "without the next block. CacheKey: " + cacheKey); + + // compare the contents, if they are not equal, we are in big trouble + int comparisonWithoutNextBlockMetadata = compareCacheBlock(existing, newBlock, false); + + if (comparisonWithoutNextBlockMetadata != 0) { + throw new RuntimeException("Cached block contents differ, which should not have happened." + + "cacheKey:" + cacheKey); + } + } + return comparison; + } + /** * Use one of these to keep a running account of cached blocks by file. Throw it away when done. * This is different than metrics in that it is stats on current state of a cache. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java index ca33b5da4eb..0224dea3971 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java @@ -46,8 +46,9 @@ public interface Cacheable extends HeapSize { /** * Serializes its data into destination. * @param destination Where to serialize to + * @param includeNextBlockMetadata Whether to include nextBlockMetadata in the Cache block. */ - void serialize(ByteBuffer destination); + void serialize(ByteBuffer destination, boolean includeNextBlockMetadata); /** * Returns CacheableDeserializer instance which reconstructs original object from ByteBuffer. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index dcea3181b79..238cd70bc96 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -95,8 +95,8 @@ import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; * Caches cache whole blocks with trailing checksums if any. We then tag on some metadata, the * content of BLOCK_METADATA_SPACE which will be flag on if we are doing 'hbase' * checksums and then the offset into the file which is needed when we re-make a cache key - * when we return the block to the cache as 'done'. See {@link Cacheable#serialize(ByteBuffer)} and - * {@link Cacheable#getDeserializer()}. + * when we return the block to the cache as 'done'. + * See {@link Cacheable#serialize(ByteBuffer, boolean)} and {@link Cacheable#getDeserializer()}. * *

TODO: Should we cache the checksums? Down in Writer#getBlockForCaching(CacheConfig) where * we make a block to cache-on-write, there is an attempt at turning off checksums. This is not the @@ -325,7 +325,6 @@ public class HFileBlock implements Cacheable { * Creates a new {@link HFile} block from the given fields. This constructor * is used only while writing blocks and caching, * and is sitting in a byte buffer and we want to stuff the block into cache. - * See {@link Writer#getBlockForCaching(CacheConfig)}. * *

TODO: The caller presumes no checksumming * required of this block instance since going into cache; checksum already verified on @@ -341,9 +340,11 @@ public class HFileBlock implements Cacheable { * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader} * @param fileContext HFile meta data */ - HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader, - long prevBlockOffset, ByteBuffer b, boolean fillHeader, long offset, - final int nextBlockOnDiskSize, int onDiskDataSizeWithHeader, HFileContext fileContext) { + @VisibleForTesting + public HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, + int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer b, boolean fillHeader, + long offset, final int nextBlockOnDiskSize, int onDiskDataSizeWithHeader, + HFileContext fileContext) { init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset, onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext); this.buf = new SingleByteBuff(b); @@ -620,6 +621,7 @@ public class HFileBlock implements Cacheable { .append(", buf=[").append(buf).append("]") .append(", dataBeginsWith=").append(dataBegin) .append(", fileContext=").append(fileContext) + .append(", nextBlockOnDiskSize=").append(nextBlockOnDiskSize) .append("]"); return sb.toString(); } @@ -1893,11 +1895,9 @@ public class HFileBlock implements Cacheable { // Cacheable implementation @Override - public void serialize(ByteBuffer destination) { - // BE CAREFUL!! There is a custom version of this serialization over in BucketCache#doDrain. - // Make sure any changes in here are reflected over there. + public void serialize(ByteBuffer destination, boolean includeNextBlockMetadata) { this.buf.get(destination, 0, getSerializedLength() - BLOCK_METADATA_SPACE); - destination = addMetaData(destination); + destination = addMetaData(destination, includeNextBlockMetadata); // Make it ready for reading. flip sets position to zero and limit to current position which // is what we want if we do not want to serialize the block plus checksums if present plus @@ -1910,7 +1910,7 @@ public class HFileBlock implements Cacheable { */ public ByteBuffer getMetaData() { ByteBuffer bb = ByteBuffer.allocate(BLOCK_METADATA_SPACE); - bb = addMetaData(bb); + bb = addMetaData(bb, true); bb.flip(); return bb; } @@ -1919,10 +1919,12 @@ public class HFileBlock implements Cacheable { * Adds metadata at current position (position is moved forward). Does not flip or reset. * @return The passed destination with metadata added. */ - private ByteBuffer addMetaData(final ByteBuffer destination) { + private ByteBuffer addMetaData(final ByteBuffer destination, boolean includeNextBlockMetadata) { destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0); destination.putLong(this.offset); - destination.putInt(this.nextBlockOnDiskSize); + if (includeNextBlockMetadata) { + destination.putInt(this.nextBlockOnDiskSize); + } return destination; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java index 2b63e540fea..c2e55989f28 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java @@ -380,15 +380,20 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { LruCachedBlock cb = map.get(cacheKey); if (cb != null) { - // compare the contents, if they are not equal, we are in big trouble - if (BlockCacheUtil.compareCacheBlock(buf, cb.getBuffer()) != 0) { - throw new RuntimeException("Cached block contents differ, which should not have happened." - + "cacheKey:" + cacheKey); + int comparison = BlockCacheUtil.validateBlockAddition(cb.getBuffer(), buf, cacheKey); + if (comparison != 0) { + if (comparison < 0) { + LOG.warn("Cached block contents differ by nextBlockOnDiskSize. Keeping cached block."); + return; + } else { + LOG.warn("Cached block contents differ by nextBlockOnDiskSize. Caching new block."); + } + } else { + String msg = "Cached an already cached block: " + cacheKey + " cb:" + cb.getCacheKey(); + msg += ". This is harmless and can happen in rare cases (see HBASE-8547)"; + LOG.warn(msg); + return; } - String msg = "Cached an already cached block: " + cacheKey + " cb:" + cb.getCacheKey(); - msg += ". This is harmless and can happen in rare cases (see HBASE-8547)"; - LOG.warn(msg); - return; } long currentSize = size.get(); long currentAcceptableSize = acceptableSize(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 673057c7949..bc45d398862 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -425,21 +425,28 @@ public class BucketCache implements BlockCache, HeapSize { return; } - if (backingMap.containsKey(cacheKey)) { + if (backingMap.containsKey(cacheKey) || ramCache.containsKey(cacheKey)) { Cacheable existingBlock = getBlock(cacheKey, false, false, false); + try { - if (BlockCacheUtil.compareCacheBlock(cachedItem, existingBlock) != 0) { - throw new RuntimeException("Cached block contents differ, which should not have happened." - + "cacheKey:" + cacheKey); + int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, cachedItem, cacheKey); + if (comparison != 0) { + if (comparison < 0) { + LOG.warn("Cached block contents differ by nextBlockOnDiskSize. Keeping cached block."); + return; + } else { + LOG.warn("Cached block contents differ by nextBlockOnDiskSize. Caching new block."); + } + } else { + String msg = "Caching an already cached block: " + cacheKey; + msg += ". This is harmless and can happen in rare cases (see HBASE-8547)"; + LOG.warn(msg); + return; } - String msg = "Caching an already cached block: " + cacheKey; - msg += ". This is harmless and can happen in rare cases (see HBASE-8547)"; - LOG.warn(msg); } finally { // return the block since we need to decrement the count returnBlock(cacheKey, existingBlock); } - return; } /* @@ -1505,7 +1512,7 @@ public class BucketCache implements BlockCache, HeapSize { ioEngine.write(metadata, offset + len - metadata.limit()); } else { ByteBuffer bb = ByteBuffer.allocate(len); - data.serialize(bb); + data.serialize(bb, true); ioEngine.write(bb, offset); } } catch (IOException ioe) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java index 4300387a5a8..af28912d0ef 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java @@ -311,7 +311,7 @@ public class CacheTestUtils { } @Override - public void serialize(ByteBuffer destination) { + public void serialize(ByteBuffer destination, boolean includeNextBlockMetadata) { destination.putInt(buf.length); Thread.yield(); destination.put(buf); @@ -398,4 +398,15 @@ public class CacheTestUtils { return this.block; } } + + public static void getBlockAndAssertEquals(BlockCache cache, BlockCacheKey key, + Cacheable blockToCache, ByteBuffer destBuffer, + ByteBuffer expectedBuffer) { + destBuffer.clear(); + cache.cacheBlock(key, blockToCache); + Cacheable actualBlock = cache.getBlock(key, false, false, false); + actualBlock.serialize(destBuffer, true); + assertEquals(expectedBuffer, destBuffer); + cache.returnBlock(key, actualBlock); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java index cd0bdc23f88..f84a3194398 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java @@ -136,7 +136,7 @@ public class TestCacheConfig { } @Override - public void serialize(ByteBuffer destination) { + public void serialize(ByteBuffer destination, boolean includeNextBlockMetadata) { LOG.info("Serialized " + this + " to " + destination); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java index baa672a5eae..ab039ec3ef5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java @@ -131,7 +131,7 @@ public class TestCachedBlockQueue extends TestCase { } @Override - public void serialize(ByteBuffer destination) { + public void serialize(ByteBuffer destination, boolean includeNextBlockMetadata) { } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java index 829c4b84756..f4e6696d7d6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java @@ -468,7 +468,7 @@ public class TestHFileBlock { // test serialized blocks for (boolean reuseBuffer : new boolean[] { false, true }) { ByteBuffer serialized = ByteBuffer.allocate(blockFromHFile.getSerializedLength()); - blockFromHFile.serialize(serialized); + blockFromHFile.serialize(serialized, true); HFileBlock deserialized = (HFileBlock) blockFromHFile.getDeserializer().deserialize( new SingleByteBuff(serialized), reuseBuffer, MemoryType.EXCLUSIVE); @@ -858,4 +858,27 @@ public class TestHFileBlock { block.heapSize()); } } + + @Test + public void testSerializeWithoutNextBlockMetadata() { + int size = 100; + int length = HConstants.HFILEBLOCK_HEADER_SIZE + size; + byte[] byteArr = new byte[length]; + ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size); + HFileContext meta = new HFileContextBuilder().build(); + HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf, + HFileBlock.FILL_HEADER, -1, 52, -1, meta); + HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf, + HFileBlock.FILL_HEADER, -1, -1, -1, meta); + ByteBuffer buff1 = ByteBuffer.allocate(length); + ByteBuffer buff2 = ByteBuffer.allocate(length); + blockWithNextBlockMetadata.serialize(buff1, true); + blockWithoutNextBlockMetadata.serialize(buff2, true); + assertNotEquals(buff1, buff2); + buff1.clear(); + buff2.clear(); + blockWithNextBlockMetadata.serialize(buff1, false); + blockWithoutNextBlockMetadata.serialize(buff2, false); + assertEquals(buff1, buff2); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java index fab19a405a5..df0bed57052 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java @@ -31,6 +31,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; import org.apache.hadoop.hbase.io.HeapSize; @@ -803,6 +804,59 @@ public class TestLruBlockCache { assertEquals(0.5, stats.getHitCachingRatioPastNPeriods(), delta); } + @Test + public void testCacheBlockNextBlockMetadataMissing() { + long maxSize = 100000; + long blockSize = calculateBlockSize(maxSize, 10); + int size = 100; + int length = HConstants.HFILEBLOCK_HEADER_SIZE + size; + byte[] byteArr = new byte[length]; + ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size); + HFileContext meta = new HFileContextBuilder().build(); + HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf, + HFileBlock.FILL_HEADER, -1, 52, -1, meta); + HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf, + HFileBlock.FILL_HEADER, -1, -1, -1, meta); + + LruBlockCache cache = new LruBlockCache(maxSize, blockSize, false, + (int)Math.ceil(1.2*maxSize/blockSize), + LruBlockCache.DEFAULT_LOAD_FACTOR, + LruBlockCache.DEFAULT_CONCURRENCY_LEVEL, + 0.66f, // min + 0.99f, // acceptable + 0.33f, // single + 0.33f, // multi + 0.34f, // memory + 1.2f, // limit + false, + 1024); + + BlockCacheKey key = new BlockCacheKey("key1", 0); + ByteBuffer actualBuffer = ByteBuffer.allocate(length); + ByteBuffer block1Buffer = ByteBuffer.allocate(length); + ByteBuffer block2Buffer = ByteBuffer.allocate(length); + blockWithNextBlockMetadata.serialize(block1Buffer, true); + blockWithoutNextBlockMetadata.serialize(block2Buffer, true); + + //Add blockWithNextBlockMetadata, expect blockWithNextBlockMetadata back. + CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithNextBlockMetadata, actualBuffer, + block1Buffer); + + //Add blockWithoutNextBlockMetada, expect blockWithNextBlockMetadata back. + CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithoutNextBlockMetadata, actualBuffer, + block1Buffer); + + //Clear and add blockWithoutNextBlockMetadata + cache.clearCache(); + assertNull(cache.getBlock(key, false, false, false)); + CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithoutNextBlockMetadata, actualBuffer, + block2Buffer); + + //Add blockWithNextBlockMetadata, expect blockWithNextBlockMetadata to replace. + CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithNextBlockMetadata, actualBuffer, + block1Buffer); + } + private CachedItem [] generateFixedBlocks(int numBlocks, int size, String pfx) { CachedItem [] blocks = new CachedItem[numBlocks]; for(int i=0;i