HBASE-17522 Handle JVM throwing runtime exceptions when we ask for details on heap usage the same as a correctly returned 'undefined'.

Signed-off-by: Michael Stack <stack@apache.org>

 Conflicts:
	hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/HeapMemorySizeUtil.java
	hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
	hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
	hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
	hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java
	hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
	hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java
This commit is contained in:
Sean Busbey 2017-01-25 15:30:56 -06:00
parent 3aac1b6884
commit 3d4639f34d
9 changed files with 183 additions and 56 deletions

View File

@ -45,6 +45,27 @@ public class HeapMemorySizeUtil {
// a constant to convert a fraction to a percentage
private static final int CONVERT_TO_PERCENTAGE = 100;
private static final String JVM_HEAP_EXCEPTION = "Got an exception while attempting to read " +
"information about the JVM heap. Please submit this log information in a bug report and " +
"include your JVM settings, specifically the GC in use and any -XX options. Consider " +
"restarting the service.";
/**
* Return JVM memory statistics while properly handling runtime exceptions from the JVM.
* @return a memory usage object, null if there was a runtime exception. (n.b. you
* could also get -1 values back from the JVM)
* @see MemoryUsage
*/
public static MemoryUsage safeGetHeapMemoryUsage() {
MemoryUsage usage = null;
try {
usage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
} catch (RuntimeException exception) {
LOG.warn(JVM_HEAP_EXCEPTION, exception);
}
return usage;
}
/**
* Checks whether we have enough heap memory left out after portion for Memstore and Block cache.
* We need atleast 20% of heap left out for other RS functions.
@ -143,10 +164,62 @@ public class HeapMemorySizeUtil {
// L2 block cache can be on heap when IOEngine is "heap"
if (bucketCacheIOEngineName != null && bucketCacheIOEngineName.startsWith("heap")) {
float bucketCachePercentage = conf.getFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0F);
MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
long max = -1L;
final MemoryUsage usage = safeGetHeapMemoryUsage();
if (usage != null) {
max = usage.getMax();
}
l2CachePercent = bucketCachePercentage < 1 ? bucketCachePercentage
: (bucketCachePercentage * 1024 * 1024) / mu.getMax();
: (bucketCachePercentage * 1024 * 1024) / max;
}
return l2CachePercent;
}
/**
* @param conf used to read cache configs
* @return the number of bytes to use for LRU, negative if disabled.
* @throws IllegalArgumentException if HFILE_BLOCK_CACHE_SIZE_KEY is > 1.0
*/
public static long getLruCacheSize(final Configuration conf) {
float cachePercentage = conf.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY,
HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
if (cachePercentage <= 0.0001f) {
return -1;
}
if (cachePercentage > 1.0) {
throw new IllegalArgumentException(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY +
" must be between 0.0 and 1.0, and not > 1.0");
}
long max = -1L;
final MemoryUsage usage = safeGetHeapMemoryUsage();
if (usage != null) {
max = usage.getMax();
}
// Calculate the amount of heap to give the heap.
return (long) (max * cachePercentage);
}
/**
* @param conf used to read config for bucket cache size. (< 1 is treated as % and > is treated as MiB)
* @return the number of bytes to use for bucket cache, negative if disabled.
*/
public static long getBucketCacheSize(final Configuration conf) {
final float bucketCachePercentage = conf.getFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0F);
long bucketCacheSize;
// Values < 1 are treated as % of heap
if (bucketCachePercentage < 1) {
long max = -1L;
final MemoryUsage usage = safeGetHeapMemoryUsage();
if (usage != null) {
max = usage.getMax();
}
bucketCacheSize = (long)(max * bucketCachePercentage);
// values >= 1 are treated as # of MiB
} else {
bucketCacheSize = (long)(bucketCachePercentage * 1024 * 1024);
}
return bucketCacheSize;
}
}

View File

@ -34,7 +34,8 @@ org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo;
org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad;
org.apache.hadoop.hbase.util.DirectMemoryUtils;
org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix;
java.lang.management.ManagementFactory;
java.lang.management.MemoryUsage;
org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil;
</%import>
<div class="tabbable">
<ul class="nav nav-pills">
@ -94,6 +95,15 @@ java.lang.management.ManagementFactory;
<%args>
MetricsRegionServerWrapper mWrap;
</%args>
<%java
long usedHeap = -1L;
long maxHeap = -1L;
final MemoryUsage usage = HeapMemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
maxHeap = usage.getMax();
usedHeap = usage.getUsed();
}
%>
<table class="table table-striped">
<tr>
<tr>
@ -106,12 +116,10 @@ MetricsRegionServerWrapper mWrap;
</tr>
<tr>
<td>
<% TraditionalBinaryPrefix.long2String(
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed(), "B", 1) %>
<% TraditionalBinaryPrefix.long2String(usedHeap, "B", 1) %>
</td>
<td>
<% TraditionalBinaryPrefix.long2String(
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax(), "B", 1) %>
<% TraditionalBinaryPrefix.long2String(maxHeap, "B", 1) %>
</td>
<td>
<% TraditionalBinaryPrefix.long2String(DirectMemoryUtils.getDirectMemoryUsage(), "B", 1) %>

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
@ -529,30 +530,16 @@ public class CacheConfig {
@VisibleForTesting
static boolean blockCacheDisabled = false;
static long getLruCacheSize(final Configuration conf, final MemoryUsage mu) {
float cachePercentage = conf.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY,
HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
if (cachePercentage <= 0.0001f) {
blockCacheDisabled = true;
return -1;
}
if (cachePercentage > 1.0) {
throw new IllegalArgumentException(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY +
" must be between 0.0 and 1.0, and not > 1.0");
}
// Calculate the amount of heap to give the heap.
return (long) (mu.getMax() * cachePercentage);
}
/**
* @param c Configuration to use.
* @param mu JMX Memory Bean
* @return An L1 instance. Currently an instance of LruBlockCache.
*/
private static LruBlockCache getL1(final Configuration c, final MemoryUsage mu) {
long lruCacheSize = getLruCacheSize(c, mu);
if (lruCacheSize < 0) return null;
private static LruBlockCache getL1(final Configuration c) {
final long lruCacheSize = HeapMemorySizeUtil.getLruCacheSize(c);
if (lruCacheSize < 0) {
blockCacheDisabled = true;
}
if (blockCacheDisabled) return null;
int blockSize = c.getInt(BLOCKCACHE_BLOCKSIZE_KEY, HConstants.DEFAULT_BLOCKSIZE);
LOG.info("Allocating LruBlockCache size=" +
StringUtils.byteDesc(lruCacheSize) + ", blockSize=" + StringUtils.byteDesc(blockSize));
@ -561,11 +548,10 @@ public class CacheConfig {
/**
* @param c Configuration to use.
* @param mu JMX Memory Bean
* @return Returns L2 block cache instance (for now it is BucketCache BlockCache all the time)
* or null if not supposed to be a L2.
*/
private static BlockCache getL2(final Configuration c, final MemoryUsage mu) {
private static BlockCache getL2(final Configuration c) {
final boolean useExternal = c.getBoolean(EXTERNAL_BLOCKCACHE_KEY, EXTERNAL_BLOCKCACHE_DEFAULT);
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to use " + (useExternal?" External":" Internal") + " l2 cache");
@ -577,7 +563,7 @@ public class CacheConfig {
}
// otherwise use the bucket cache.
return getBucketCache(c, mu);
return getBucketCache(c);
}
@ -607,15 +593,13 @@ public class CacheConfig {
}
private static BlockCache getBucketCache(Configuration c, MemoryUsage mu) {
private static BlockCache getBucketCache(Configuration c) {
// Check for L2. ioengine name must be non-null.
String bucketCacheIOEngineName = c.get(BUCKET_CACHE_IOENGINE_KEY, null);
if (bucketCacheIOEngineName == null || bucketCacheIOEngineName.length() <= 0) return null;
int blockSize = c.getInt(BLOCKCACHE_BLOCKSIZE_KEY, HConstants.DEFAULT_BLOCKSIZE);
float bucketCachePercentage = c.getFloat(BUCKET_CACHE_SIZE_KEY, 0F);
long bucketCacheSize = (long) (bucketCachePercentage < 1? mu.getMax() * bucketCachePercentage:
bucketCachePercentage * 1024 * 1024);
final long bucketCacheSize = HeapMemorySizeUtil.getBucketCacheSize(c);
if (bucketCacheSize <= 0) {
throw new IllegalStateException("bucketCacheSize <= 0; Check " +
BUCKET_CACHE_SIZE_KEY + " setting and/or server java heap size");
@ -662,11 +646,10 @@ public class CacheConfig {
public static synchronized BlockCache instantiateBlockCache(Configuration conf) {
if (GLOBAL_BLOCK_CACHE_INSTANCE != null) return GLOBAL_BLOCK_CACHE_INSTANCE;
if (blockCacheDisabled) return null;
MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
LruBlockCache l1 = getL1(conf, mu);
// blockCacheDisabled is set as a side-effect of getL1(), so check it again after the call.
LruBlockCache l1 = getL1(conf);
// blockCacheDisabled is set as a side-effect of getL1Internal(), so check it again after the call.
if (blockCacheDisabled) return null;
BlockCache l2 = getL2(conf, mu);
BlockCache l2 = getL2(conf);
if (l2 == null) {
GLOBAL_BLOCK_CACHE_INSTANCE = l1;
} else {

View File

@ -0,0 +1,31 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
/**
* Reasons we flush.
* @see MemStoreFlusher
* @see FlushRequester
*/
@InterfaceAudience.Private
enum FlushType {
NORMAL, ABOVE_LOWER_MARK, ABOVE_HIGHER_MARK;
}

View File

@ -7282,7 +7282,16 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
ClientProtos.RegionLoadStats.Builder stats = ClientProtos.RegionLoadStats.newBuilder();
stats.setMemstoreLoad((int) (Math.min(100, (this.memstoreSize.get() * 100) / this
.memstoreFlushSize)));
stats.setHeapOccupancy((int)rsServices.getHeapMemoryManager().getHeapOccupancyPercent()*100);
if (rsServices.getHeapMemoryManager() != null) {
// the HeapMemoryManager uses -0.0 to signal a problem asking the JVM,
// so we could just do the calculation below and we'll get a 0.
// treating it as a special case analogous to no HMM instead so that it can be
// programatically treated different from using <1% of heap.
final float occupancy = rsServices.getHeapMemoryManager().getHeapOccupancyPercent();
if (occupancy != HeapMemoryManager.HEAP_OCCUPANCY_ERROR_VALUE) {
stats.setHeapOccupancy((int)(occupancy * 100));
}
}
stats.setCompactionPressure((int)rsServices.getCompactionPressure()*100 > 100 ? 100 :
(int)rsServices.getCompactionPressure()*100);
return stats.build();

View File

@ -99,6 +99,7 @@ import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.http.HttpServer;
import org.apache.hadoop.hbase.http.InfoServer;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
import org.apache.hadoop.hbase.ipc.RpcClient;
import org.apache.hadoop.hbase.ipc.RpcClientFactory;
@ -1214,14 +1215,20 @@ public class HRegionServer extends HasThread implements
// history.
MetricsRegionServerWrapper regionServerWrapper = metricsRegionServer.getRegionServerWrapper();
Collection<Region> regions = getOnlineRegionsLocalContext();
MemoryUsage memory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
long usedMemory = -1L;
long maxMemory = -1L;
final MemoryUsage usage = HeapMemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
usedMemory = usage.getUsed();
maxMemory = usage.getMax();
}
ClusterStatusProtos.ServerLoad.Builder serverLoad =
ClusterStatusProtos.ServerLoad.newBuilder();
serverLoad.setNumberOfRequests((int) regionServerWrapper.getRequestsPerSecond());
serverLoad.setTotalNumberOfRequests((int) regionServerWrapper.getTotalRequestCount());
serverLoad.setUsedHeapMB((int)(memory.getUsed() / 1024 / 1024));
serverLoad.setMaxHeapMB((int) (memory.getMax() / 1024 / 1024));
serverLoad.setUsedHeapMB((int)(usedMemory / 1024 / 1024));
serverLoad.setMaxHeapMB((int) (maxMemory / 1024 / 1024));
Set<String> coprocessors = getWAL(null).getCoprocessorHost().getCoprocessors();
Builder coprocessorBuilder = Coprocessor.newBuilder();
for (String coprocessor : coprocessors) {

View File

@ -62,6 +62,8 @@ public class HeapMemoryManager {
public static final String HBASE_RS_HEAP_MEMORY_TUNER_CLASS =
"hbase.regionserver.heapmemory.tuner.class";
public static final float HEAP_OCCUPANCY_ERROR_VALUE = -0.0f;
private float globalMemStorePercent;
private float globalMemStorePercentMinRange;
private float globalMemStorePercentMaxRange;
@ -83,7 +85,19 @@ public class HeapMemoryManager {
private final int defaultChorePeriod;
private final float heapOccupancyLowWatermark;
private long maxHeapSize = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
private final long maxHeapSize;
{
// note that this initialization still isn't threadsafe, because updating a long isn't atomic.
long tempMaxHeap = -1L;
try {
final MemoryUsage usage = HeapMemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
tempMaxHeap = usage.getMax();
}
} finally {
maxHeapSize = tempMaxHeap;
}
}
public static HeapMemoryManager create(Configuration conf, FlushRequester memStoreFlusher,
Server server, RegionServerAccounting regionServerAccounting) {
@ -210,10 +224,10 @@ public class HeapMemoryManager {
}
/**
* @return heap occupancy percentage, 0 &lt;= n &lt;= 1
* @return heap occupancy percentage, 0 &lt;= n &lt;= 1. or -0.0 for error asking JVM
*/
public float getHeapOccupancyPercent() {
return this.heapOccupancyPercent;
return this.heapOccupancyPercent == Float.MAX_VALUE ? HEAP_OCCUPANCY_ERROR_VALUE : this.heapOccupancyPercent;
}
private class HeapMemoryTunerChore extends ScheduledChore implements FlushRequestListener {
@ -235,8 +249,15 @@ public class HeapMemoryManager {
@Override
protected void chore() {
// Sample heap occupancy
MemoryUsage memUsage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
heapOccupancyPercent = (float)memUsage.getUsed() / (float)memUsage.getCommitted();
final MemoryUsage usage = HeapMemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
heapOccupancyPercent = (float)usage.getUsed() / (float)usage.getCommitted();
} else {
// previously, an exception would have meant death for the tuning chore
// so switch to alarming so that we similarly stop tuning until we get
// heap usage information again.
heapOccupancyPercent = Float.MAX_VALUE;
}
// If we are above the heap occupancy alarm low watermark, switch to short
// sleeps for close monitoring. Stop autotuning, we are in a danger zone.
if (heapOccupancyPercent >= heapOccupancyLowWatermark) {

View File

@ -804,7 +804,3 @@ class MemStoreFlusher implements FlushRequester {
}
}
}
enum FlushType {
NORMAL, ABOVE_LOWER_MARK, ABOVE_HIGHER_MARK;
}

View File

@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil;
import org.apache.hadoop.hbase.util.Threads;
import org.junit.After;
import org.junit.Before;
@ -251,8 +252,7 @@ public class TestCacheConfig {
BlockCache [] bcs = cbc.getBlockCaches();
assertTrue(bcs[0] instanceof LruBlockCache);
LruBlockCache lbc = (LruBlockCache)bcs[0];
assertEquals(CacheConfig.getLruCacheSize(this.conf,
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage()), lbc.getMaxSize());
assertEquals(HeapMemorySizeUtil.getLruCacheSize(this.conf), lbc.getMaxSize());
assertTrue(bcs[1] instanceof BucketCache);
BucketCache bc = (BucketCache)bcs[1];
// getMaxSize comes back in bytes but we specified size in MB
@ -269,8 +269,7 @@ public class TestCacheConfig {
// Make lru size is smaller than bcSize for sure. Need this to be true so when eviction
// from L1 happens, it does not fail because L2 can't take the eviction because block too big.
this.conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.001f);
MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
long lruExpectedSize = CacheConfig.getLruCacheSize(this.conf, mu);
long lruExpectedSize = HeapMemorySizeUtil.getLruCacheSize(this.conf);
final int bcSize = 100;
long bcExpectedSize = 100 * 1024 * 1024; // MB.
assertTrue(lruExpectedSize < bcExpectedSize);
@ -343,4 +342,4 @@ public class TestCacheConfig {
blocks.get(BlockType.DATA) == null? 0:
blocks.get(BlockType.DATA).intValue());
}
}
}