HBASE-26274 Create an option to reintroduce BlockCache to mapreduce job (#3684)
Introduce `hfile.onheap.block.cache.fixed.size` and default to disable. when using ClientSideRegionScanner it will be enabled with a fixed size for caching INDEX/LEAF_INDEX block when a client, e.g. snapshot scanner, scans the entire HFile and does not need to seek/reseek to index block multiple times. Signed-off-by: Josh Elser <elserj@apache.org>
This commit is contained in:
parent
84b5511d13
commit
e0325c40f6
|
@ -1092,6 +1092,17 @@ public final class HConstants {
|
|||
|
||||
public static final float HFILE_BLOCK_CACHE_SIZE_DEFAULT = 0.4f;
|
||||
|
||||
/**
|
||||
* Configuration key for setting the fix size of the block size, default do nothing and it should
|
||||
* be explicitly set by user or only used within ClientSideRegionScanner. if it's set less than
|
||||
* current max on heap size, it overrides the max size of block cache
|
||||
*/
|
||||
public static final String HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY =
|
||||
"hfile.onheap.block.cache.fixed.size";
|
||||
public static final long HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT = 0L;
|
||||
public static final long HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT =
|
||||
32 * 1024 * 1024L;
|
||||
|
||||
/*
|
||||
* Minimum percentage of free heap necessary for a successful cluster startup.
|
||||
*/
|
||||
|
|
|
@ -25,8 +25,10 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
|
||||
import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
|
||||
import org.apache.hadoop.hbase.mob.MobFileCache;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.RegionScanner;
|
||||
|
@ -60,6 +62,15 @@ public class ClientSideRegionScanner extends AbstractClientScanner {
|
|||
region = HRegion.newHRegion(CommonFSUtils.getTableDir(rootDir, htd.getTableName()), null, fs,
|
||||
conf, hri, htd, null);
|
||||
region.setRestoredRegion(true);
|
||||
// non RS process does not have a block cache, and this a client side scanner,
|
||||
// create one for MapReduce jobs to cache the INDEX block by setting to use
|
||||
// IndexOnlyLruBlockCache and set a value to HBASE_CLIENT_SCANNER_BLOCK_CACHE_SIZE_KEY
|
||||
conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU");
|
||||
conf.setIfUnset(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY,
|
||||
String.valueOf(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT));
|
||||
// don't allow L2 bucket cache for non RS process to avoid unexpected disk usage.
|
||||
conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
|
||||
region.setBlockCache(BlockCacheFactory.createBlockCache(conf));
|
||||
// we won't initialize the MobFileCache when not running in RS process. so provided an
|
||||
// initialized cache. Consider the case: an CF was set from an mob to non-mob. if we only
|
||||
// initialize cache for MOB region, NPE from HMobStore will still happen. So Initialize the
|
||||
|
@ -122,6 +133,10 @@ public class ClientSideRegionScanner extends AbstractClientScanner {
|
|||
}
|
||||
}
|
||||
|
||||
HRegion getRegion() {
|
||||
return region;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean renewLease() {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -132,4 +132,13 @@ public interface BlockCache extends Iterable<CachedBlock> {
|
|||
* @return The list of sub blockcaches that make up this one; returns null if no sub caches.
|
||||
*/
|
||||
BlockCache [] getBlockCaches();
|
||||
|
||||
/**
|
||||
* Check if block type is meta or index block
|
||||
* @param blockType block type of a given HFile block
|
||||
* @return true if block type is non-data block
|
||||
*/
|
||||
default boolean isMetaBlock(BlockType blockType) {
|
||||
return blockType != null && blockType.getCategory() != BlockType.BlockCategory.DATA;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ public final class BlockCacheFactory {
|
|||
*/
|
||||
|
||||
/**
|
||||
* Configuration key to cache block policy (Lru, TinyLfu).
|
||||
* Configuration key to cache block policy (Lru, TinyLfu, AdaptiveLRU, IndexOnlyLRU).
|
||||
*/
|
||||
public static final String BLOCKCACHE_POLICY_KEY = "hfile.block.cache.policy";
|
||||
public static final String BLOCKCACHE_POLICY_DEFAULT = "LRU";
|
||||
|
@ -129,6 +129,8 @@ public final class BlockCacheFactory {
|
|||
StringUtils.byteDesc(cacheSize) + ", blockSize=" + StringUtils.byteDesc(blockSize));
|
||||
if (policy.equalsIgnoreCase("LRU")) {
|
||||
return new LruBlockCache(cacheSize, blockSize, true, c);
|
||||
} else if (policy.equalsIgnoreCase("IndexOnlyLRU")) {
|
||||
return new IndexOnlyLruBlockCache(cacheSize, blockSize, true, c);
|
||||
} else if (policy.equalsIgnoreCase("TinyLFU")) {
|
||||
return new TinyLfuBlockCache(cacheSize, blockSize, ForkJoinPool.commonPool(), c);
|
||||
} else if (policy.equalsIgnoreCase("AdaptiveLRU")) {
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Iterator;
|
|||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.io.HeapSize;
|
||||
import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
|
||||
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.io.hfile;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* An on heap block cache implementation extended LruBlockCache and only cache index block.
|
||||
* This block cache should be only used by
|
||||
* {@link org.apache.hadoop.hbase.client.ClientSideRegionScanner} that normally considers to be
|
||||
* used by client resides out of the region server, e.g. a container of a map reduce job.
|
||||
**/
|
||||
@InterfaceAudience.Private
|
||||
public class IndexOnlyLruBlockCache extends LruBlockCache {
|
||||
|
||||
public IndexOnlyLruBlockCache(long maxSize, long blockSize, boolean evictionThread,
|
||||
Configuration conf) {
|
||||
super(maxSize, blockSize, evictionThread, conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache only index block with the specified name and buffer
|
||||
* @param cacheKey block's cache key
|
||||
* @param buf block buffer
|
||||
* @param inMemory if block is in-memory
|
||||
*/
|
||||
@Override
|
||||
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
|
||||
if (isMetaBlock(buf.getBlockType())) {
|
||||
super.cacheBlock(cacheKey, buf, inMemory);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -228,9 +228,13 @@ public class MemorySizeUtil {
|
|||
if (usage != null) {
|
||||
max = usage.getMax();
|
||||
}
|
||||
|
||||
float onHeapCacheFixedSize = (float) conf
|
||||
.getLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY,
|
||||
HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT) / max;
|
||||
// Calculate the amount of heap to give the heap.
|
||||
return (long) (max * cachePercentage);
|
||||
return (onHeapCacheFixedSize > 0 && onHeapCacheFixedSize < cachePercentage) ?
|
||||
(long) (max * onHeapCacheFixedSize) :
|
||||
(long) (max * cachePercentage);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.client;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtil;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.io.hfile.BlockCache;
|
||||
import org.apache.hadoop.hbase.io.hfile.IndexOnlyLruBlockCache;
|
||||
import org.apache.hadoop.hbase.testclassification.ClientTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category({ SmallTests.class, ClientTests.class })
|
||||
public class TestClientSideRegionScanner {
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestClientSideRegionScanner.class);
|
||||
|
||||
private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
|
||||
|
||||
private Configuration conf;
|
||||
private Path rootDir;
|
||||
private FileSystem fs;
|
||||
private TableDescriptor htd;
|
||||
private RegionInfo hri;
|
||||
private Scan scan;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
TEST_UTIL.startMiniCluster(1);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownAfterClass() throws Exception {
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
conf = TEST_UTIL.getConfiguration();
|
||||
rootDir = TEST_UTIL.getDefaultRootDirPath();
|
||||
fs = TEST_UTIL.getTestFileSystem();
|
||||
htd = TEST_UTIL.getAdmin().getDescriptor(TableName.META_TABLE_NAME);
|
||||
hri = TEST_UTIL.getAdmin().getRegions(TableName.META_TABLE_NAME).get(0);
|
||||
scan = new Scan();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultBlockCache() throws IOException {
|
||||
Configuration copyConf = new Configuration(conf);
|
||||
ClientSideRegionScanner clientSideRegionScanner =
|
||||
new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
|
||||
|
||||
BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
|
||||
assertNotNull(blockCache);
|
||||
assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
|
||||
assertTrue(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT == blockCache
|
||||
.getMaxSize());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConfiguredBlockCache() throws IOException {
|
||||
Configuration copyConf = new Configuration(conf);
|
||||
// tiny 1MB fixed cache size
|
||||
long blockCacheFixedSize = 1024 * 1024L;
|
||||
copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, blockCacheFixedSize);
|
||||
ClientSideRegionScanner clientSideRegionScanner =
|
||||
new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
|
||||
|
||||
BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
|
||||
assertNotNull(blockCache);
|
||||
assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
|
||||
assertTrue(blockCacheFixedSize == blockCache.getMaxSize());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoBlockCache() throws IOException {
|
||||
Configuration copyConf = new Configuration(conf);
|
||||
copyConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
|
||||
ClientSideRegionScanner clientSideRegionScanner =
|
||||
new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
|
||||
|
||||
BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
|
||||
assertNull(blockCache);
|
||||
}
|
||||
}
|
|
@ -369,4 +369,34 @@ public class TestCacheConfig {
|
|||
} catch (IllegalArgumentException e) {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndexOnlyLruBlockCache() {
|
||||
CacheConfig cc = new CacheConfig(this.conf);
|
||||
conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU");
|
||||
BlockCache blockCache = BlockCacheFactory.createBlockCache(this.conf);
|
||||
assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
|
||||
// reject data block
|
||||
long initialBlockCount = blockCache.getBlockCount();
|
||||
BlockCacheKey bck = new BlockCacheKey("bck", 0);
|
||||
Cacheable c = new DataCacheEntry();
|
||||
blockCache.cacheBlock(bck, c, true);
|
||||
// accept index block
|
||||
Cacheable indexCacheEntry = new IndexCacheEntry();
|
||||
blockCache.cacheBlock(bck, indexCacheEntry, true);
|
||||
assertEquals(initialBlockCount + 1, blockCache.getBlockCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetOnHeapCacheSize() {
|
||||
Configuration copyConf = new Configuration(conf);
|
||||
long fixedSize = 1024 * 1024L;
|
||||
long onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf);
|
||||
assertEquals(null, copyConf.get(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY));
|
||||
assertTrue(onHeapCacheSize > 0 && onHeapCacheSize != fixedSize);
|
||||
// when HBASE_BLOCK_CACHE_FIXED_SIZE_KEY is set, it will be a fixed size
|
||||
copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, fixedSize);
|
||||
onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf);
|
||||
assertEquals(fixedSize, onHeapCacheSize);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue