From a50d5c4a8d36ec7043321656e7adc3168d97f1f9 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Fri, 28 Feb 2020 20:32:26 +0530 Subject: [PATCH] HBASE-23631 : Backport HBASE-23588 Cache index & bloom blocks on write if CacheCompactedBlocksOnWrite is enabled Signed-off-by: ramkrish86 Signed-off-by: chenxu14 <47170471+chenxu14@users.noreply.github.com> --- .../hadoop/hbase/io/hfile/CacheConfig.java | 18 ++++++- .../hadoop/hbase/regionserver/HStore.java | 30 +++++++++++- .../hbase/client/TestFromClientSide.java | 10 ++-- .../hbase/io/hfile/TestCacheOnWrite.java | 47 ++++++++++++++++--- .../TestCacheOnWriteInSchema.java | 5 +- 5 files changed, 95 insertions(+), 15 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index 81ca56f8abc..8c5ef2bacf3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -209,10 +209,10 @@ public class CacheConfig { private boolean cacheDataOnWrite; /** Whether index blocks should be cached when new files are written */ - private final boolean cacheIndexesOnWrite; + private boolean cacheIndexesOnWrite; /** Whether compound bloom filter blocks should be cached on write */ - private final boolean cacheBloomsOnWrite; + private boolean cacheBloomsOnWrite; /** Whether blocks of a file should be evicted when the file is closed */ private boolean evictOnClose; @@ -438,6 +438,20 @@ public class CacheConfig { this.cacheDataInL1 = cacheDataInL1; } + + /** + * Enable cache on write including: + * cacheDataOnWrite + * cacheIndexesOnWrite + * cacheBloomsOnWrite + */ + public void enableCacheOnWrite() { + this.cacheDataOnWrite = true; + this.cacheIndexesOnWrite = true; + this.cacheBloomsOnWrite = true; + } + + /** * @return true if index blocks should be written to the cache when an HFile * is written, false if not diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 92a9ea6555d..d2561722a0a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -145,6 +145,8 @@ public class HStore implements Store { private AtomicLong storeSize = new AtomicLong(); private AtomicLong totalUncompressedBytes = new AtomicLong(); + private boolean cacheOnWriteLogged; + /** * RWLock for store operations. * Locked in shared mode when the list of component stores is looked at: @@ -371,6 +373,7 @@ public class HStore implements Store { ", storagePolicy=" + policyName + ", verifyBulkLoads=" + verifyBulkLoads + ", encoding=" + family.getDataBlockEncoding() + ", compression=" + family.getCompressionType()); + cacheOnWriteLogged = false; } /** @@ -1113,9 +1116,34 @@ public class HStore implements Store { if (isCompaction) { // Don't cache data on write on compactions, unless specifically configured to do so writerCacheConf = new CacheConfig(cacheConf); - writerCacheConf.setCacheDataOnWrite(cacheConf.shouldCacheCompactedBlocksOnWrite()); + final boolean cacheCompactedBlocksOnWrite = + cacheConf.shouldCacheCompactedBlocksOnWrite(); + // if data blocks are to be cached on write + // during compaction, we should forcefully + // cache index and bloom blocks as well + if (cacheCompactedBlocksOnWrite) { + writerCacheConf.enableCacheOnWrite(); + if (!cacheOnWriteLogged) { + LOG.info("For Store " + getColumnFamilyName() + + " , cacheCompactedBlocksOnWrite is true, hence enabled " + + "cacheOnWrite for Data blocks, Index blocks and Bloom filter blocks"); + cacheOnWriteLogged = true; + } + } else { + writerCacheConf.setCacheDataOnWrite(false); + } } else { writerCacheConf = cacheConf; + final boolean shouldCacheDataOnWrite = cacheConf.shouldCacheDataOnWrite(); + if (shouldCacheDataOnWrite) { + writerCacheConf.enableCacheOnWrite(); + if (!cacheOnWriteLogged) { + LOG.info("For Store " + getColumnFamilyName() + + " , cacheDataOnWrite is true, hence enabled cacheOnWrite for " + + "Index blocks and Bloom filter blocks"); + cacheOnWriteLogged = true; + } + } } InetSocketAddress[] favoredNodes = null; if (region.getRegionServerServices() != null) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java index 228c1c7d22d..2bb8fa321e0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java @@ -5286,10 +5286,11 @@ public class TestFromClientSide { assertEquals(startBlockHits, cache.getStats().getHitCount()); assertEquals(startBlockMiss, cache.getStats().getMissCount()); // flush the data - System.out.println("Flushing cache"); + LOG.debug("Flushing cache"); region.flush(true); - // expect one more block in cache, no change in hits/misses - long expectedBlockCount = startBlockCount + 1; + // expect two more blocks in cache - DATA and ROOT_INDEX + // , no change in hits/misses + long expectedBlockCount = startBlockCount + 2; long expectedBlockHits = startBlockHits; long expectedBlockMiss = startBlockMiss; assertEquals(expectedBlockCount, cache.getBlockCount()); @@ -5315,7 +5316,8 @@ public class TestFromClientSide { // flush, one new block System.out.println("Flushing cache"); region.flush(true); - assertEquals(++expectedBlockCount, cache.getBlockCount()); + // + 1 for Index Block + assertEquals(++expectedBlockCount + 1, cache.getBlockCount()); assertEquals(expectedBlockHits, cache.getStats().getHitCount()); assertEquals(expectedBlockMiss, cache.getStats().getMissCount()); // compact, net minus two blocks, two hits, no misses diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java index 0bbae69ce0e..4096770a4ef 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java @@ -30,7 +30,9 @@ import java.util.Collection; import java.util.EnumMap; import java.util.List; import java.util.Random; +import java.util.Set; +import com.google.common.collect.ImmutableSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -97,6 +99,23 @@ public class TestCacheOnWrite { private static final BloomType BLOOM_TYPE = BloomType.ROWCOL; private static final int CKBYTES = 512; + + private static final Set INDEX_BLOCK_TYPES = ImmutableSet.of( + BlockType.INDEX_V1, + BlockType.INTERMEDIATE_INDEX, + BlockType.ROOT_INDEX, + BlockType.LEAF_INDEX + ); + private static final Set BLOOM_BLOCK_TYPES = ImmutableSet.of( + BlockType.BLOOM_CHUNK, + BlockType.GENERAL_BLOOM_META, + BlockType.DELETE_FAMILY_BLOOM_META + ); + private static final Set DATA_BLOCK_TYPES = ImmutableSet.of( + BlockType.ENCODED_DATA, + BlockType.DATA + ); + /** The number of valid key types possible in a store file */ private static final int NUM_VALID_KEY_TYPES = KeyValue.Type.values().length - 2; @@ -442,20 +461,34 @@ public class TestCacheOnWrite { LOG.debug("compactStores() returned"); boolean dataBlockCached = false; + boolean bloomBlockCached = false; + boolean indexBlockCached = false; + for (CachedBlock block : blockCache) { - if (BlockType.ENCODED_DATA.equals(block.getBlockType()) - || BlockType.DATA.equals(block.getBlockType())) { + if (DATA_BLOCK_TYPES.contains(block.getBlockType())) { dataBlockCached = true; - break; + } else if (BLOOM_BLOCK_TYPES.contains(block.getBlockType())) { + bloomBlockCached = true; + } else if (INDEX_BLOCK_TYPES.contains(block.getBlockType())) { + indexBlockCached = true; } } // Data blocks should be cached in instances where we are caching blocks on write. In the case // of testing // BucketCache, we cannot verify block type as it is not stored in the cache. - assertTrue( - "\nTest description: " + testDescription + "\ncacheBlocksOnCompaction: " - + cacheBlocksOnCompaction + "\n", - (cacheBlocksOnCompaction && !(blockCache instanceof BucketCache)) == dataBlockCached); + boolean cacheOnCompactAndNonBucketCache = cacheBlocksOnCompaction + && !(blockCache instanceof BucketCache); + + String assertErrorMessage = "\nTest description: " + testDescription + + "\ncacheBlocksOnCompaction: " + + cacheBlocksOnCompaction + "\n"; + + assertEquals(assertErrorMessage, cacheOnCompactAndNonBucketCache, dataBlockCached); + + if (cacheOnCompactAndNonBucketCache) { + assertTrue(assertErrorMessage, bloomBlockCached); + assertTrue(assertErrorMessage, indexBlockCached); + } ((HRegion)region).close(); } finally { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java index 46a9cb3e518..d595cfda6db 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java @@ -236,7 +236,10 @@ public class TestCacheOnWriteInSchema { offset); boolean isCached = cache.getBlock(blockCacheKey, true, false, true) != null; boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType()); - if (shouldBeCached != isCached) { + final BlockType blockType = block.getBlockType(); + + if (shouldBeCached != isCached && + (cowType.blockType1.equals(blockType) || cowType.blockType2.equals(blockType))) { throw new AssertionError( "shouldBeCached: " + shouldBeCached+ "\n" + "isCached: " + isCached + "\n" +