From 5bfe1da9847b88e1ee81bc5e166faf493f042d2d Mon Sep 17 00:00:00 2001 From: Biju Nair Date: Wed, 6 Sep 2017 16:07:19 -0400 Subject: [PATCH] HBASE-18652 Expose individual cache stats in a CombinedCache through JMX Signed-off-by: tedyu --- .../MetricsRegionServerSource.java | 17 +++++ .../MetricsRegionServerWrapper.java | 40 +++++++++++ .../MetricsRegionServerSourceImpl.java | 16 +++++ .../hadoop/hbase/io/hfile/CacheConfig.java | 28 ++++++-- .../MetricsRegionServerWrapperImpl.java | 71 ++++++++++++++++++- .../MetricsRegionServerWrapperStub.java | 40 +++++++++++ .../regionserver/TestMetricsRegionServer.java | 8 +++ 7 files changed, 214 insertions(+), 6 deletions(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java index 3ac678ea3d7..b72deb8070d 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java @@ -345,6 +345,23 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo String BLOCK_CACHE_DELETE_FAMILY_BLOOM_HIT_COUNT = "blockCacheDeleteFamilyBloomHitCount"; String BLOCK_CACHE_TRAILER_HIT_COUNT = "blockCacheTrailerHitCount"; + String L1_CACHE_HIT_COUNT = "l1CacheHitCount"; + String L1_CACHE_HIT_COUNT_DESC = "L1 cache hit count."; + String L1_CACHE_MISS_COUNT = "l1CacheMissCount"; + String L1_CACHE_MISS_COUNT_DESC = "L1 cache miss count."; + String L1_CACHE_HIT_RATIO = "l1CacheHitRatio"; + String L1_CACHE_HIT_RATIO_DESC = "L1 cache hit ratio."; + String L1_CACHE_MISS_RATIO = "l1CacheMissRatio"; + String L1_CACHE_MISS_RATIO_DESC = "L1 cache miss ratio."; + String L2_CACHE_HIT_COUNT = "l2CacheHitCount"; + String L2_CACHE_HIT_COUNT_DESC = "L2 cache hit count."; + String L2_CACHE_MISS_COUNT = "l2CacheMissCount"; + String L2_CACHE_MISS_COUNT_DESC = "L2 cache miss count."; + String L2_CACHE_HIT_RATIO = "l2CacheHitRatio"; + String L2_CACHE_HIT_RATIO_DESC = "L2 cache hit ratio."; + String L2_CACHE_MISS_RATIO = "l2CacheMissRatio"; + String L2_CACHE_MISS_RATIO_DESC = "L2 cache miss ratio."; + String RS_START_TIME_NAME = "regionServerStartTime"; String ZOOKEEPER_QUORUM_NAME = "zookeeperQuorum"; String SERVER_NAME_NAME = "serverName"; diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java index 7d7f66d09d5..3344dce48b8 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java @@ -258,6 +258,46 @@ public interface MetricsRegionServerWrapper { */ long getBlockCacheFailedInsertions(); + /** + * Hit count of L1 cache. + */ + public long getL1CacheHitCount(); + + /** + * Miss count of L1 cache. + */ + public long getL1CacheMissCount(); + + /** + * Hit ratio of L1 cache. + */ + public double getL1CacheHitRatio(); + + /** + * Miss ratio of L1 cache. + */ + public double getL1CacheMissRatio(); + + /** + * Hit count of L2 cache. + */ + public long getL2CacheHitCount(); + + /** + * Miss count of L2 cache. + */ + public long getL2CacheMissCount(); + + /** + * Hit ratio of L2 cache. + */ + public double getL2CacheHitRatio(); + + /** + * Miss ratio of L2 cache. + */ + public double getL2CacheMissRatio(); + /** * Force a re-computation of the metrics. */ diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java index 628a5648136..208188e5c8a 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java @@ -458,6 +458,22 @@ public class MetricsRegionServerSourceImpl .addCounter(Interns.info(BLOCK_CACHE_DELETE_FAMILY_BLOOM_HIT_COUNT, ""), rsWrap.getDeleteFamilyBloomHitCount()) .addCounter(Interns.info(BLOCK_CACHE_TRAILER_HIT_COUNT, ""), rsWrap.getTrailerHitCount()) + .addGauge(Interns.info(L1_CACHE_HIT_COUNT, L1_CACHE_HIT_COUNT_DESC), + rsWrap.getL1CacheHitCount()) + .addGauge(Interns.info(L1_CACHE_MISS_COUNT, L1_CACHE_MISS_COUNT_DESC), + rsWrap.getL1CacheMissCount()) + .addGauge(Interns.info(L1_CACHE_HIT_RATIO, L1_CACHE_HIT_RATIO_DESC), + rsWrap.getL1CacheHitRatio()) + .addGauge(Interns.info(L1_CACHE_MISS_RATIO, L1_CACHE_MISS_RATIO_DESC), + rsWrap.getL1CacheMissRatio()) + .addGauge(Interns.info(L2_CACHE_HIT_COUNT, L2_CACHE_HIT_COUNT_DESC), + rsWrap.getL2CacheHitCount()) + .addGauge(Interns.info(L2_CACHE_MISS_COUNT, L2_CACHE_MISS_COUNT_DESC), + rsWrap.getL2CacheMissCount()) + .addGauge(Interns.info(L2_CACHE_HIT_RATIO, L2_CACHE_HIT_RATIO_DESC), + rsWrap.getL2CacheHitRatio()) + .addGauge(Interns.info(L2_CACHE_MISS_RATIO, L2_CACHE_MISS_RATIO_DESC), + rsWrap.getL2CacheMissRatio()) .addCounter(Interns.info(UPDATES_BLOCKED_TIME, UPDATES_BLOCKED_DESC), rsWrap.getUpdatesBlockedTime()) .addCounter(Interns.info(FLUSHED_CELLS, FLUSHED_CELLS_DESC), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index 1d68b99a69e..bd80b727db5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -525,6 +525,8 @@ public class CacheConfig { // Clear this if in tests you'd make more than one block cache instance. @VisibleForTesting static BlockCache GLOBAL_BLOCK_CACHE_INSTANCE; + private static LruBlockCache GLOBAL_L1_CACHE_INSTANCE = null; + private static BlockCache GLOBAL_L2_CACHE_INSTANCE = null; /** Boolean whether we have disabled the block cache entirely. */ @VisibleForTesting @@ -535,6 +537,7 @@ public class CacheConfig { * @return An L1 instance. Currently an instance of LruBlockCache. */ private static LruBlockCache getL1(final Configuration c) { + if (GLOBAL_L1_CACHE_INSTANCE != null) return GLOBAL_L1_CACHE_INSTANCE; final long lruCacheSize = HeapMemorySizeUtil.getLruCacheSize(c); if (lruCacheSize < 0) { blockCacheDisabled = true; @@ -543,7 +546,8 @@ public class CacheConfig { int blockSize = c.getInt(BLOCKCACHE_BLOCKSIZE_KEY, HConstants.DEFAULT_BLOCKSIZE); LOG.info("Allocating LruBlockCache size=" + StringUtils.byteDesc(lruCacheSize) + ", blockSize=" + StringUtils.byteDesc(blockSize)); - return new LruBlockCache(lruCacheSize, blockSize, true, c); + GLOBAL_L1_CACHE_INSTANCE = new LruBlockCache(lruCacheSize, blockSize, true, c); + return GLOBAL_L1_CACHE_INSTANCE; } /** @@ -560,10 +564,26 @@ public class CacheConfig { // If we want to use an external block cache then create that. if (useExternal) { - return getExternalBlockcache(c); + GLOBAL_L2_CACHE_INSTANCE = getExternalBlockcache(c); + } else { + // otherwise use the bucket cache. + GLOBAL_L2_CACHE_INSTANCE = getBucketCache(c); } - // otherwise use the bucket cache. - return getBucketCache(c); + return GLOBAL_L2_CACHE_INSTANCE; + } + + public CacheStats getL1Stats() { + if (GLOBAL_L1_CACHE_INSTANCE != null) { + return GLOBAL_L1_CACHE_INSTANCE.getStats(); + } + return null; + } + + public CacheStats getL2Stats() { + if (GLOBAL_L2_CACHE_INSTANCE != null) { + return GLOBAL_L2_CACHE_INSTANCE.getStats(); + } + return null; } private static BlockCache getExternalBlockcache(Configuration c) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java index e805e75c689..31cfbd16666 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java @@ -84,6 +84,8 @@ class MetricsRegionServerWrapperImpl private volatile long averageRegionSize = 0L; private CacheStats cacheStats; + private CacheStats l1Stats = null; + private CacheStats l2Stats = null; private ScheduledExecutorService executor; private Runnable runnable; private long period; @@ -113,8 +115,12 @@ class MetricsRegionServerWrapperImpl */ private synchronized void initBlockCache() { CacheConfig cacheConfig = this.regionServer.cacheConfig; - if (cacheConfig != null && this.blockCache == null) { - this.blockCache = cacheConfig.getBlockCache(); + if (cacheConfig != null) { + l1Stats = cacheConfig.getL1Stats(); + l2Stats = cacheConfig.getL2Stats(); + if (this.blockCache == null) { + this.blockCache = cacheConfig.getBlockCache(); + } } if (this.blockCache != null && this.cacheStats == null) { @@ -328,6 +334,67 @@ class MetricsRegionServerWrapperImpl return this.cacheStats.getFailedInserts(); } + @Override + public long getL1CacheHitCount() { + return 200; + } + + @Override + public long getL1CacheMissCount() { + if (this.l1Stats == null) { + return 0; + } + return this.l1Stats.getMissCount(); + } + + @Override + public double getL1CacheHitRatio() { + if (this.l1Stats == null) { + return 0; + } + return this.l1Stats.getHitRatio(); + } + + @Override + public double getL1CacheMissRatio() { + if (this.l1Stats == null) { + return 0; + } + return this.l1Stats.getMissRatio(); + } + + @Override + public long getL2CacheHitCount() { + if (this.l2Stats == null) { + return 0; + } + return this.l2Stats.getHitCount(); + } + + @Override + public long getL2CacheMissCount() { + if (this.l2Stats == null) { + return 0; + } + return this.l2Stats.getMissCount(); + } + + @Override + public double getL2CacheHitRatio() { + if (this.l2Stats == null) { + return 0; + } + return this.l2Stats.getHitRatio(); + } + + @Override + public double getL2CacheMissRatio() { + if (this.l2Stats == null) { + return 0; + } + return this.l2Stats.getMissRatio(); + } + @Override public void forceRecompute() { this.runnable.run(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java index b57332309c4..21d06cf0156 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java @@ -260,6 +260,46 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe return 36; } + @Override + public long getL1CacheHitCount() { + return 200; + } + + @Override + public long getL1CacheMissCount() { + return 100; + } + + @Override + public double getL1CacheHitRatio() { + return 80; + } + + @Override + public double getL1CacheMissRatio() { + return 20; + } + + @Override + public long getL2CacheHitCount() { + return 800; + } + + @Override + public long getL2CacheMissCount() { + return 200; + } + + @Override + public double getL2CacheHitRatio() { + return 90; + } + + @Override + public double getL2CacheMissRatio() { + return 10; + } + @Override public long getUpdatesBlockedTime() { return 419; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java index e1ef971b805..92acad4676a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java @@ -97,6 +97,14 @@ public class TestMetricsRegionServer { HELPER.assertGauge("blockCacheCountHitPercent", 98, serverSource); HELPER.assertGauge("blockCacheExpressHitPercent", 97, serverSource); HELPER.assertCounter("blockCacheFailedInsertionCount", 36, serverSource); + HELPER.assertGauge("l1CacheHitCount", 200, serverSource); + HELPER.assertGauge("l1CacheMissCount", 100, serverSource); + HELPER.assertGauge("l1CacheHitRatio", 80, serverSource); + HELPER.assertGauge("l1CacheMissRatio", 20, serverSource); + HELPER.assertGauge("l2CacheHitCount", 800, serverSource); + HELPER.assertGauge("l2CacheMissCount", 200, serverSource); + HELPER.assertGauge("l2CacheHitRatio", 90, serverSource); + HELPER.assertGauge("l2CacheMissRatio", 10, serverSource); HELPER.assertCounter("updatesBlockedTime", 419, serverSource); }