From 275cdc6052281241dc22ac77d0337d18285c588e Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 2 Jan 2020 20:33:17 +0530 Subject: [PATCH] HBASE-23588 : Cache index & bloom blocks on write if CacheCompactedBlocksOnWrite is enabled Signed-off-by: ramkrish86 Signed-off-by: chenxu14 <47170471+chenxu14@users.noreply.github.com> --- .../hadoop/hbase/io/hfile/CacheConfig.java | 18 +++++- .../hadoop/hbase/regionserver/HStore.java | 19 +++++- .../hbase/client/TestFromClientSide.java | 16 +++-- .../hbase/io/hfile/TestCacheOnWrite.java | 59 +++++++++++++++---- .../TestCacheOnWriteInSchema.java | 5 +- 5 files changed, 98 insertions(+), 19 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index 7dbf4f8ba2d..2558e1e3ebf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -117,10 +117,10 @@ public class CacheConfig { private boolean cacheDataOnWrite; /** Whether index blocks should be cached when new files are written */ - private final boolean cacheIndexesOnWrite; + private boolean cacheIndexesOnWrite; /** Whether compound bloom filter blocks should be cached on write */ - private final boolean cacheBloomsOnWrite; + private boolean cacheBloomsOnWrite; /** Whether blocks of a file should be evicted when the file is closed */ private boolean evictOnClose; @@ -275,6 +275,20 @@ public class CacheConfig { this.cacheDataOnWrite = cacheDataOnWrite; } + + /** + * Enable cache on write including: + * cacheDataOnWrite + * cacheIndexesOnWrite + * cacheBloomsOnWrite + */ + public void enableCacheOnWrite() { + this.cacheDataOnWrite = true; + this.cacheIndexesOnWrite = true; + this.cacheBloomsOnWrite = true; + } + + /** * @return true if index blocks should be written to the cache when an HFile * is written, false if not diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index d5f60783b90..0a4f5cf5942 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -1120,9 +1120,26 @@ public class HStore implements Store, HeapSize, StoreConfigInformation, Propagat if (isCompaction) { // Don't cache data on write on compactions, unless specifically configured to do so writerCacheConf = new CacheConfig(cacheConf); - writerCacheConf.setCacheDataOnWrite(cacheConf.shouldCacheCompactedBlocksOnWrite()); + final boolean shouldCacheCompactedBlocksOnWrite = cacheConf + .shouldCacheCompactedBlocksOnWrite(); + // if data blocks are to be cached on write + // during compaction, we should forcefully + // cache index and bloom blocks as well + if (shouldCacheCompactedBlocksOnWrite) { + writerCacheConf.enableCacheOnWrite(); + LOG.info("cacheCompactedBlocksOnWrite is true, hence enabled cacheOnWrite for " + + "Data blocks, Index blocks and Bloom filter blocks"); + } else { + writerCacheConf.setCacheDataOnWrite(false); + } } else { writerCacheConf = cacheConf; + final boolean shouldCacheDataOnWrite = cacheConf.shouldCacheDataOnWrite(); + if (shouldCacheDataOnWrite) { + writerCacheConf.enableCacheOnWrite(); + LOG.info("cacheDataOnWrite is true, hence enabled cacheOnWrite for " + + "Index blocks and Bloom filter blocks"); + } } InetSocketAddress[] favoredNodes = null; if (region.getRegionServerServices() != null) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java index 1322c5bfcc1..61d78ce6430 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java @@ -5317,15 +5317,19 @@ public class TestFromClientSide { put.addColumn(FAMILY, QUALIFIER, data); table.put(put); assertTrue(Bytes.equals(table.get(new Get(ROW)).value(), data)); + // data was in memstore so don't expect any changes assertEquals(startBlockCount, cache.getBlockCount()); assertEquals(startBlockHits, cache.getStats().getHitCount()); assertEquals(startBlockMiss, cache.getStats().getMissCount()); + // flush the data - System.out.println("Flushing cache"); + LOG.debug("Flushing cache"); region.flush(true); - // expect one more block in cache, no change in hits/misses - long expectedBlockCount = startBlockCount + 1; + + // expect two more blocks in cache - DATA and ROOT_INDEX + // , no change in hits/misses + long expectedBlockCount = startBlockCount + 2; long expectedBlockHits = startBlockHits; long expectedBlockMiss = startBlockMiss; assertEquals(expectedBlockCount, cache.getBlockCount()); @@ -5351,7 +5355,9 @@ public class TestFromClientSide { // flush, one new block System.out.println("Flushing cache"); region.flush(true); - assertEquals(++expectedBlockCount, cache.getBlockCount()); + + // + 1 for Index Block + assertEquals(++expectedBlockCount + 1, cache.getBlockCount()); assertEquals(expectedBlockHits, cache.getStats().getHitCount()); assertEquals(expectedBlockMiss, cache.getStats().getMissCount()); // compact, net minus two blocks, two hits, no misses @@ -6816,4 +6822,4 @@ public class TestFromClientSide { familyDirs = FSUtils.getFamilyDirs(TEST_UTIL.getTestFileSystem(), regionDirs.get(0)); assertTrue("CF dir count should be 1, but was " + familyDirs.size(), familyDirs.size() == 1); } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java index 6f218de3b63..cb7042adefd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java @@ -31,6 +31,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -73,6 +74,7 @@ import org.junit.runners.Parameterized.Parameters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; /** @@ -109,6 +111,23 @@ public class TestCacheOnWrite { private static final BloomType BLOOM_TYPE = BloomType.ROWCOL; private static final int CKBYTES = 512; + + private static final Set INDEX_BLOCK_TYPES = ImmutableSet.of( + BlockType.INDEX_V1, + BlockType.INTERMEDIATE_INDEX, + BlockType.ROOT_INDEX, + BlockType.LEAF_INDEX + ); + private static final Set BLOOM_BLOCK_TYPES = ImmutableSet.of( + BlockType.BLOOM_CHUNK, + BlockType.GENERAL_BLOOM_META, + BlockType.DELETE_FAMILY_BLOOM_META + ); + private static final Set DATA_BLOCK_TYPES = ImmutableSet.of( + BlockType.ENCODED_DATA, + BlockType.DATA + ); + /** The number of valid key types possible in a store file */ private static final int NUM_VALID_KEY_TYPES = KeyValue.Type.values().length - 2; @@ -422,9 +441,13 @@ public class TestCacheOnWrite { final String cf = "myCF"; final byte[] cfBytes = Bytes.toBytes(cf); final int maxVersions = 3; - ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(cfBytes) - .setCompressionType(compress).setBloomFilterType(BLOOM_TYPE).setMaxVersions(maxVersions) - .setDataBlockEncoding(NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding()).build(); + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder + .newBuilder(cfBytes) + .setCompressionType(compress) + .setBloomFilterType(BLOOM_TYPE) + .setMaxVersions(maxVersions) + .setDataBlockEncoding(NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding()) + .build(); HRegion region = TEST_UTIL.createTestRegion(table, cfd, blockCache); int rowIdx = 0; long ts = EnvironmentEdgeManager.currentTime(); @@ -462,21 +485,36 @@ public class TestCacheOnWrite { LOG.debug("compactStores() returned"); boolean dataBlockCached = false; + boolean bloomBlockCached = false; + boolean indexBlockCached = false; + for (CachedBlock block : blockCache) { - if (BlockType.ENCODED_DATA.equals(block.getBlockType()) - || BlockType.DATA.equals(block.getBlockType())) { + if (DATA_BLOCK_TYPES.contains(block.getBlockType())) { dataBlockCached = true; - break; + } else if (BLOOM_BLOCK_TYPES.contains(block.getBlockType())) { + bloomBlockCached = true; + } else if (INDEX_BLOCK_TYPES.contains(block.getBlockType())) { + indexBlockCached = true; } } // Data blocks should be cached in instances where we are caching blocks on write. In the case // of testing // BucketCache, we cannot verify block type as it is not stored in the cache. - assertTrue( - "\nTest description: " + testDescription + "\ncacheBlocksOnCompaction: " - + cacheBlocksOnCompaction + "\n", - (cacheBlocksOnCompaction && !(blockCache instanceof BucketCache)) == dataBlockCached); + boolean cacheOnCompactAndNonBucketCache = cacheBlocksOnCompaction + && !(blockCache instanceof BucketCache); + + String assertErrorMessage = "\nTest description: " + testDescription + + "\ncacheBlocksOnCompaction: " + + cacheBlocksOnCompaction + "\n"; + + assertEquals(assertErrorMessage, cacheOnCompactAndNonBucketCache, dataBlockCached); + + if (cacheOnCompactAndNonBucketCache) { + assertTrue(assertErrorMessage, bloomBlockCached); + assertTrue(assertErrorMessage, indexBlockCached); + } + region.close(); } finally { @@ -496,4 +534,5 @@ public class TestCacheOnWrite { testCachingDataBlocksDuringCompactionInternals(false, false); testCachingDataBlocksDuringCompactionInternals(true, true); } + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java index 5bb103c6e3b..2c5103d6428 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java @@ -243,7 +243,10 @@ public class TestCacheOnWriteInSchema { offset); boolean isCached = cache.getBlock(blockCacheKey, true, false, true) != null; boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType()); - if (shouldBeCached != isCached) { + final BlockType blockType = block.getBlockType(); + + if (shouldBeCached != isCached && + (cowType.blockType1.equals(blockType) || cowType.blockType2.equals(blockType))) { throw new AssertionError( "shouldBeCached: " + shouldBeCached+ "\n" + "isCached: " + isCached + "\n" +