From 6556a5ee91c1e8f4b03a4b992297a1d9954bfed7 Mon Sep 17 00:00:00 2001 From: "Tak Lon (Stephen) Wu" Date: Wed, 22 Sep 2021 09:17:18 -0700 Subject: [PATCH] HBASE-26274 Create an option to reintroduce BlockCache to mapreduce job (#3684) Introduce `hfile.onheap.block.cache.fixed.size` and default to disable. when using ClientSideRegionScanner it will be enabled with a fixed size for caching INDEX/LEAF_INDEX block when a client, e.g. snapshot scanner, scans the entire HFile and does not need to seek/reseek to index block multiple times. --- .../org/apache/hadoop/hbase/HConstants.java | 11 ++ .../hbase/client/ClientSideRegionScanner.java | 15 +++ .../hadoop/hbase/io/hfile/BlockCache.java | 9 ++ .../hbase/io/hfile/BlockCacheFactory.java | 4 +- .../hbase/io/hfile/CombinedBlockCache.java | 5 - .../io/hfile/IndexOnlyLruBlockCache.java | 49 ++++++++ .../hadoop/hbase/io/util/MemorySizeUtil.java | 8 +- .../client/TestClientSideRegionScanner.java | 115 ++++++++++++++++++ .../hbase/io/hfile/TestCacheConfig.java | 30 +++++ 9 files changed, 238 insertions(+), 8 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 10a38f63c3a..64641585695 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1049,6 +1049,17 @@ public final class HConstants { public static final float HFILE_BLOCK_CACHE_SIZE_DEFAULT = 0.4f; + /** + * Configuration key for setting the fix size of the block size, default do nothing and it should + * be explicitly set by user or only used within ClientSideRegionScanner. if it's set less than + * current max on heap size, it overrides the max size of block cache + */ + public static final String HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY = + "hfile.onheap.block.cache.fixed.size"; + public static final long HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT = 0L; + public static final long HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT = + 32 * 1024 * 1024L; + /* * Minimum percentage of free heap necessary for a successful cluster startup. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java index 1e2b69927ed..1feafc18993 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java @@ -25,8 +25,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.client.metrics.ScanMetrics; +import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; import org.apache.hadoop.hbase.mob.MobFileCache; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.RegionScanner; @@ -60,6 +62,15 @@ public class ClientSideRegionScanner extends AbstractClientScanner { region = HRegion.newHRegion(CommonFSUtils.getTableDir(rootDir, htd.getTableName()), null, fs, conf, hri, htd, null); region.setRestoredRegion(true); + // non RS process does not have a block cache, and this a client side scanner, + // create one for MapReduce jobs to cache the INDEX block by setting to use + // IndexOnlyLruBlockCache and set a value to HBASE_CLIENT_SCANNER_BLOCK_CACHE_SIZE_KEY + conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU"); + conf.setIfUnset(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, + String.valueOf(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT)); + // don't allow L2 bucket cache for non RS process to avoid unexpected disk usage. + conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY); + region.setBlockCache(BlockCacheFactory.createBlockCache(conf)); // we won't initialize the MobFileCache when not running in RS process. so provided an // initialized cache. Consider the case: an CF was set from an mob to non-mob. if we only // initialize cache for MOB region, NPE from HMobStore will still happen. So Initialize the @@ -122,6 +133,10 @@ public class ClientSideRegionScanner extends AbstractClientScanner { } } + HRegion getRegion() { + return region; + } + @Override public boolean renewLease() { throw new UnsupportedOperationException(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index f54edae31e1..6f32d623c5e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -147,4 +147,13 @@ public interface BlockCache extends Iterable { * @return The list of sub blockcaches that make up this one; returns null if no sub caches. */ BlockCache [] getBlockCaches(); + + /** + * Check if block type is meta or index block + * @param blockType block type of a given HFile block + * @return true if block type is non-data block + */ + default boolean isMetaBlock(BlockType blockType) { + return blockType != null && blockType.getCategory() != BlockType.BlockCategory.DATA; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java index 90dd833c113..12c769ec805 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java @@ -43,7 +43,7 @@ public final class BlockCacheFactory { */ /** - * Configuration key to cache block policy (Lru, TinyLfu). + * Configuration key to cache block policy (Lru, TinyLfu, AdaptiveLRU, IndexOnlyLRU). */ public static final String BLOCKCACHE_POLICY_KEY = "hfile.block.cache.policy"; public static final String BLOCKCACHE_POLICY_DEFAULT = "LRU"; @@ -129,6 +129,8 @@ public final class BlockCacheFactory { StringUtils.byteDesc(cacheSize) + ", blockSize=" + StringUtils.byteDesc(blockSize)); if (policy.equalsIgnoreCase("LRU")) { return new LruBlockCache(cacheSize, blockSize, true, c); + } else if (policy.equalsIgnoreCase("IndexOnlyLRU")) { + return new IndexOnlyLruBlockCache(cacheSize, blockSize, true, c); } else if (policy.equalsIgnoreCase("TinyLFU")) { return new TinyLfuBlockCache(cacheSize, blockSize, ForkJoinPool.commonPool(), c); } else if (policy.equalsIgnoreCase("AdaptiveLRU")) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 571ae070c8c..dc4f697bae9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -22,7 +22,6 @@ import java.util.Iterator; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.io.HeapSize; -import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; /** @@ -71,10 +70,6 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { cacheBlock(cacheKey, buf, false); } - private boolean isMetaBlock(BlockType blockType) { - return blockType.getCategory() != BlockCategory.DATA; - } - @Override public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat, boolean updateCacheMetrics) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java new file mode 100644 index 00000000000..50b195dd8e9 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import org.apache.hadoop.conf.Configuration; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * An on heap block cache implementation extended LruBlockCache and only cache index block. + * This block cache should be only used by + * {@link org.apache.hadoop.hbase.client.ClientSideRegionScanner} that normally considers to be + * used by client resides out of the region server, e.g. a container of a map reduce job. + **/ +@InterfaceAudience.Private +public class IndexOnlyLruBlockCache extends LruBlockCache { + + public IndexOnlyLruBlockCache(long maxSize, long blockSize, boolean evictionThread, + Configuration conf) { + super(maxSize, blockSize, evictionThread, conf); + } + + /** + * Cache only index block with the specified name and buffer + * @param cacheKey block's cache key + * @param buf block buffer + * @param inMemory if block is in-memory + */ + @Override + public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) { + if (isMetaBlock(buf.getBlockType())) { + super.cacheBlock(cacheKey, buf, inMemory); + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java index 910498040e0..b1f298e3772 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java @@ -228,9 +228,13 @@ public class MemorySizeUtil { if (usage != null) { max = usage.getMax(); } - + float onHeapCacheFixedSize = (float) conf + .getLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, + HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT) / max; // Calculate the amount of heap to give the heap. - return (long) (max * cachePercentage); + return (onHeapCacheFixedSize > 0 && onHeapCacheFixedSize < cachePercentage) ? + (long) (max * onHeapCacheFixedSize) : + (long) (max * cachePercentage); } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java new file mode 100644 index 00000000000..859e36f00cb --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.io.hfile.IndexOnlyLruBlockCache; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ SmallTests.class, ClientTests.class }) +public class TestClientSideRegionScanner { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestClientSideRegionScanner.class); + + private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + + private Configuration conf; + private Path rootDir; + private FileSystem fs; + private TableDescriptor htd; + private RegionInfo hri; + private Scan scan; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniCluster(1); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws IOException { + conf = TEST_UTIL.getConfiguration(); + rootDir = TEST_UTIL.getDefaultRootDirPath(); + fs = TEST_UTIL.getTestFileSystem(); + htd = TEST_UTIL.getAdmin().getDescriptor(TableName.META_TABLE_NAME); + hri = TEST_UTIL.getAdmin().getRegions(TableName.META_TABLE_NAME).get(0); + scan = new Scan(); + } + + @Test + public void testDefaultBlockCache() throws IOException { + Configuration copyConf = new Configuration(conf); + ClientSideRegionScanner clientSideRegionScanner = + new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null); + + BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache(); + assertNotNull(blockCache); + assertTrue(blockCache instanceof IndexOnlyLruBlockCache); + assertTrue(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT == blockCache + .getMaxSize()); + } + + @Test + public void testConfiguredBlockCache() throws IOException { + Configuration copyConf = new Configuration(conf); + // tiny 1MB fixed cache size + long blockCacheFixedSize = 1024 * 1024L; + copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, blockCacheFixedSize); + ClientSideRegionScanner clientSideRegionScanner = + new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null); + + BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache(); + assertNotNull(blockCache); + assertTrue(blockCache instanceof IndexOnlyLruBlockCache); + assertTrue(blockCacheFixedSize == blockCache.getMaxSize()); + } + + @Test + public void testNoBlockCache() throws IOException { + Configuration copyConf = new Configuration(conf); + copyConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); + ClientSideRegionScanner clientSideRegionScanner = + new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null); + + BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache(); + assertNull(blockCache); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java index 65fc3afd9e6..0ec596e685a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java @@ -374,4 +374,34 @@ public class TestCacheConfig { } catch (IllegalArgumentException e) { } } + + @Test + public void testIndexOnlyLruBlockCache() { + CacheConfig cc = new CacheConfig(this.conf); + conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU"); + BlockCache blockCache = BlockCacheFactory.createBlockCache(this.conf); + assertTrue(blockCache instanceof IndexOnlyLruBlockCache); + // reject data block + long initialBlockCount = blockCache.getBlockCount(); + BlockCacheKey bck = new BlockCacheKey("bck", 0); + Cacheable c = new DataCacheEntry(); + blockCache.cacheBlock(bck, c, true); + // accept index block + Cacheable indexCacheEntry = new IndexCacheEntry(); + blockCache.cacheBlock(bck, indexCacheEntry, true); + assertEquals(initialBlockCount + 1, blockCache.getBlockCount()); + } + + @Test + public void testGetOnHeapCacheSize() { + Configuration copyConf = new Configuration(conf); + long fixedSize = 1024 * 1024L; + long onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf); + assertEquals(null, copyConf.get(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY)); + assertTrue(onHeapCacheSize > 0 && onHeapCacheSize != fixedSize); + // when HBASE_BLOCK_CACHE_FIXED_SIZE_KEY is set, it will be a fixed size + copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, fixedSize); + onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf); + assertEquals(fixedSize, onHeapCacheSize); + } }