HBASE-17522 Handle JVM throwing runtime exceptions when we ask for details on heap usage the same as a correctly returned 'undefined'.

Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
Sean Busbey 2017-01-25 15:30:56 -06:00
parent b7fc7bf246
commit 6791828698
10 changed files with 191 additions and 66 deletions

View File

@ -34,7 +34,8 @@ org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo;
org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad;
org.apache.hadoop.hbase.util.DirectMemoryUtils;
org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix;
java.lang.management.ManagementFactory;
java.lang.management.MemoryUsage;
org.apache.hadoop.hbase.io.util.MemorySizeUtil;
</%import>
<div class="tabbable">
<ul class="nav nav-pills">
@ -94,6 +95,15 @@ java.lang.management.ManagementFactory;
<%args>
MetricsRegionServerWrapper mWrap;
</%args>
<%java
long usedHeap = -1L;
long maxHeap = -1L;
final MemoryUsage usage = MemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
maxHeap = usage.getMax();
usedHeap = usage.getUsed();
}
%>
<table class="table table-striped">
<tr>
<tr>
@ -106,12 +116,10 @@ MetricsRegionServerWrapper mWrap;
</tr>
<tr>
<td>
<% TraditionalBinaryPrefix.long2String(
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed(), "B", 1) %>
<% TraditionalBinaryPrefix.long2String(usedHeap, "B", 1) %>
</td>
<td>
<% TraditionalBinaryPrefix.long2String(
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax(), "B", 1) %>
<% TraditionalBinaryPrefix.long2String(maxHeap, "B", 1) %>
</td>
<td>
<% TraditionalBinaryPrefix.long2String(DirectMemoryUtils.getDirectMemoryUsage(), "B", 1) %>

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
@ -550,40 +551,25 @@ public class CacheConfig {
@VisibleForTesting
static boolean blockCacheDisabled = false;
static long getLruCacheSize(final Configuration conf, final long xmx) {
float cachePercentage = conf.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY,
HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
if (cachePercentage <= 0.0001f) {
blockCacheDisabled = true;
return -1;
}
if (cachePercentage > 1.0) {
throw new IllegalArgumentException(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY +
" must be between 0.0 and 1.0, and not > 1.0");
}
// Calculate the amount of heap to give the heap.
return (long) (xmx * cachePercentage);
}
/**
* @param c Configuration to use.
* @return An L1 instance. Currently an instance of LruBlockCache.
*/
public static LruBlockCache getL1(final Configuration c) {
return getL1(c, ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax());
return getL1Internal(c);
}
/**
* @param c Configuration to use.
* @param xmx Max heap memory
* @return An L1 instance. Currently an instance of LruBlockCache.
*/
private synchronized static LruBlockCache getL1(final Configuration c, final long xmx) {
private synchronized static LruBlockCache getL1Internal(final Configuration c) {
if (GLOBAL_L1_CACHE_INSTANCE != null) return GLOBAL_L1_CACHE_INSTANCE;
final long lruCacheSize = MemorySizeUtil.getLruCacheSize(c);
if (lruCacheSize < 0) {
blockCacheDisabled = true;
}
if (blockCacheDisabled) return null;
long lruCacheSize = getLruCacheSize(c, xmx);
if (lruCacheSize < 0) return null;
int blockSize = c.getInt(BLOCKCACHE_BLOCKSIZE_KEY, HConstants.DEFAULT_BLOCKSIZE);
LOG.info("Allocating LruBlockCache size=" +
StringUtils.byteDesc(lruCacheSize) + ", blockSize=" + StringUtils.byteDesc(blockSize));
@ -593,11 +579,10 @@ public class CacheConfig {
/**
* @param c Configuration to use.
* @param xmx Max heap memory
* @return Returns L2 block cache instance (for now it is BucketCache BlockCache all the time)
* or null if not supposed to be a L2.
*/
private static BlockCache getL2(final Configuration c, final long xmx) {
private static BlockCache getL2(final Configuration c) {
final boolean useExternal = c.getBoolean(EXTERNAL_BLOCKCACHE_KEY, EXTERNAL_BLOCKCACHE_DEFAULT);
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to use " + (useExternal?" External":" Internal") + " l2 cache");
@ -609,7 +594,7 @@ public class CacheConfig {
}
// otherwise use the bucket cache.
return getBucketCache(c, xmx);
return getBucketCache(c);
}
@ -639,15 +624,13 @@ public class CacheConfig {
}
private static BlockCache getBucketCache(Configuration c, long xmx) {
private static BlockCache getBucketCache(Configuration c) {
// Check for L2. ioengine name must be non-null.
String bucketCacheIOEngineName = c.get(BUCKET_CACHE_IOENGINE_KEY, null);
if (bucketCacheIOEngineName == null || bucketCacheIOEngineName.length() <= 0) return null;
int blockSize = c.getInt(BLOCKCACHE_BLOCKSIZE_KEY, HConstants.DEFAULT_BLOCKSIZE);
float bucketCachePercentage = c.getFloat(BUCKET_CACHE_SIZE_KEY, 0F);
long bucketCacheSize = (long) (bucketCachePercentage < 1? xmx * bucketCachePercentage:
bucketCachePercentage * 1024 * 1024);
final long bucketCacheSize = MemorySizeUtil.getBucketCacheSize(c);
if (bucketCacheSize <= 0) {
throw new IllegalStateException("bucketCacheSize <= 0; Check " +
BUCKET_CACHE_SIZE_KEY + " setting and/or server java heap size");
@ -694,11 +677,10 @@ public class CacheConfig {
public static synchronized BlockCache instantiateBlockCache(Configuration conf) {
if (GLOBAL_BLOCK_CACHE_INSTANCE != null) return GLOBAL_BLOCK_CACHE_INSTANCE;
if (blockCacheDisabled) return null;
long xmx = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
LruBlockCache l1 = getL1(conf, xmx);
// blockCacheDisabled is set as a side-effect of getL1(), so check it again after the call.
LruBlockCache l1 = getL1Internal(conf);
// blockCacheDisabled is set as a side-effect of getL1Internal(), so check it again after the call.
if (blockCacheDisabled) return null;
BlockCache l2 = getL2(conf, xmx);
BlockCache l2 = getL2(conf);
if (l2 == null) {
GLOBAL_BLOCK_CACHE_INSTANCE = l1;
} else {

View File

@ -55,6 +55,27 @@ public class MemorySizeUtil {
// a constant to convert a fraction to a percentage
private static final int CONVERT_TO_PERCENTAGE = 100;
private static final String JVM_HEAP_EXCEPTION = "Got an exception while attempting to read " +
"information about the JVM heap. Please submit this log information in a bug report and " +
"include your JVM settings, specifically the GC in use and any -XX options. Consider " +
"restarting the service.";
/**
* Return JVM memory statistics while properly handling runtime exceptions from the JVM.
* @return a memory usage object, null if there was a runtime exception. (n.b. you
* could also get -1 values back from the JVM)
* @see MemoryUsage
*/
public static MemoryUsage safeGetHeapMemoryUsage() {
MemoryUsage usage = null;
try {
usage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
} catch (RuntimeException exception) {
LOG.warn(JVM_HEAP_EXCEPTION, exception);
}
return usage;
}
/**
* Checks whether we have enough heap memory left out after portion for Memstore and Block cache.
* We need atleast 20% of heap left out for other RS functions.
@ -167,7 +188,11 @@ public class MemorySizeUtil {
* @return the onheap global memstore limt
*/
public static long getOnheapGlobalMemstoreSize(Configuration conf) {
long max = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
long max = -1L;
final MemoryUsage usage = safeGetHeapMemoryUsage();
if (usage != null) {
max = usage.getMax();
}
float globalMemStorePercent = getGlobalMemStoreHeapPercent(conf, true);
return ((long) (max * globalMemStorePercent));
}
@ -194,10 +219,62 @@ public class MemorySizeUtil {
// L2 block cache can be on heap when IOEngine is "heap"
if (bucketCacheIOEngineName != null && bucketCacheIOEngineName.startsWith("heap")) {
float bucketCachePercentage = conf.getFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0F);
MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
long max = -1L;
final MemoryUsage usage = safeGetHeapMemoryUsage();
if (usage != null) {
max = usage.getMax();
}
l2CachePercent = bucketCachePercentage < 1 ? bucketCachePercentage
: (bucketCachePercentage * 1024 * 1024) / mu.getMax();
: (bucketCachePercentage * 1024 * 1024) / max;
}
return l2CachePercent;
}
/**
* @param conf used to read cache configs
* @return the number of bytes to use for LRU, negative if disabled.
* @throws IllegalArgumentException if HFILE_BLOCK_CACHE_SIZE_KEY is > 1.0
*/
public static long getLruCacheSize(final Configuration conf) {
float cachePercentage = conf.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY,
HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
if (cachePercentage <= 0.0001f) {
return -1;
}
if (cachePercentage > 1.0) {
throw new IllegalArgumentException(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY +
" must be between 0.0 and 1.0, and not > 1.0");
}
long max = -1L;
final MemoryUsage usage = safeGetHeapMemoryUsage();
if (usage != null) {
max = usage.getMax();
}
// Calculate the amount of heap to give the heap.
return (long) (max * cachePercentage);
}
/**
* @param conf used to read config for bucket cache size. (< 1 is treated as % and > is treated as MiB)
* @return the number of bytes to use for bucket cache, negative if disabled.
*/
public static long getBucketCacheSize(final Configuration conf) {
final float bucketCachePercentage = conf.getFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0F);
long bucketCacheSize;
// Values < 1 are treated as % of heap
if (bucketCachePercentage < 1) {
long max = -1L;
final MemoryUsage usage = safeGetHeapMemoryUsage();
if (usage != null) {
max = usage.getMax();
}
bucketCacheSize = (long)(max * bucketCachePercentage);
// values >= 1 are treated as # of MiB
} else {
bucketCacheSize = (long)(bucketCachePercentage * 1024 * 1024);
}
return bucketCacheSize;
}
}

View File

@ -0,0 +1,37 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
/**
* Reasons we flush.
* @see MemStoreFlusher
* @see FlushRequester
*/
@InterfaceAudience.Private
enum FlushType {
NORMAL,
ABOVE_ONHEAP_LOWER_MARK, /* happens due to lower mark breach of onheap memstore settings
An offheap memstore can even breach the onheap_lower_mark*/
ABOVE_ONHEAP_HIGHER_MARK,/* happens due to higher mark breach of onheap memstore settings
An offheap memstore can even breach the onheap_higher_mark*/
ABOVE_OFFHEAP_LOWER_MARK,/* happens due to lower mark breach of offheap memstore settings*/
ABOVE_OFFHEAP_HIGHER_MARK;/*/* happens due to higer mark breach of offheap memstore settings*/
}

View File

@ -6980,8 +6980,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
stats.setMemstoreLoad((int) (Math.min(100, (this.memstoreDataSize.get() * 100) / this
.memstoreFlushSize)));
if (rsServices.getHeapMemoryManager() != null) {
stats.setHeapOccupancy(
(int) rsServices.getHeapMemoryManager().getHeapOccupancyPercent() * 100);
// the HeapMemoryManager uses -0.0 to signal a problem asking the JVM,
// so we could just do the calculation below and we'll get a 0.
// treating it as a special case analogous to no HMM instead so that it can be
// programatically treated different from using <1% of heap.
final float occupancy = rsServices.getHeapMemoryManager().getHeapOccupancyPercent();
if (occupancy != HeapMemoryManager.HEAP_OCCUPANCY_ERROR_VALUE) {
stats.setHeapOccupancy((int)(occupancy * 100));
}
}
stats.setCompactionPressure((int)rsServices.getCompactionPressure()*100 > 100 ? 100 :
(int)rsServices.getCompactionPressure()*100);

View File

@ -1243,14 +1243,20 @@ public class HRegionServer extends HasThread implements
// history.
MetricsRegionServerWrapper regionServerWrapper = metricsRegionServer.getRegionServerWrapper();
Collection<Region> regions = getOnlineRegionsLocalContext();
MemoryUsage memory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
long usedMemory = -1L;
long maxMemory = -1L;
final MemoryUsage usage = MemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
usedMemory = usage.getUsed();
maxMemory = usage.getMax();
}
ClusterStatusProtos.ServerLoad.Builder serverLoad =
ClusterStatusProtos.ServerLoad.newBuilder();
serverLoad.setNumberOfRequests((int) regionServerWrapper.getRequestsPerSecond());
serverLoad.setTotalNumberOfRequests((int) regionServerWrapper.getTotalRequestCount());
serverLoad.setUsedHeapMB((int)(memory.getUsed() / 1024 / 1024));
serverLoad.setMaxHeapMB((int) (memory.getMax() / 1024 / 1024));
serverLoad.setUsedHeapMB((int)(usedMemory / 1024 / 1024));
serverLoad.setMaxHeapMB((int) (maxMemory / 1024 / 1024));
Set<String> coprocessors = getWAL(null).getCoprocessorHost().getCoprocessors();
Builder coprocessorBuilder = Coprocessor.newBuilder();
for (String coprocessor : coprocessors) {
@ -3625,4 +3631,4 @@ public class HRegionServer extends HasThread implements
return new LockServiceClient(conf, lockStub, clusterConnection.getNonceGenerator())
.regionLock(regionInfos, description, abort);
}
}
}

View File

@ -66,6 +66,8 @@ public class HeapMemoryManager {
public static final String HBASE_RS_HEAP_MEMORY_TUNER_CLASS =
"hbase.regionserver.heapmemory.tuner.class";
public static final float HEAP_OCCUPANCY_ERROR_VALUE = -0.0f;
private float globalMemStorePercent;
private float globalMemStorePercentMinRange;
private float globalMemStorePercentMaxRange;
@ -88,7 +90,19 @@ public class HeapMemoryManager {
private final int defaultChorePeriod;
private final float heapOccupancyLowWatermark;
private long maxHeapSize = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
private final long maxHeapSize;
{
// note that this initialization still isn't threadsafe, because updating a long isn't atomic.
long tempMaxHeap = -1L;
try {
final MemoryUsage usage = MemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
tempMaxHeap = usage.getMax();
}
} finally {
maxHeapSize = tempMaxHeap;
}
}
private MetricsHeapMemoryManager metricsHeapMemoryManager;
@ -222,10 +236,10 @@ public class HeapMemoryManager {
}
/**
* @return heap occupancy percentage, 0 &lt;= n &lt;= 1
* @return heap occupancy percentage, 0 &lt;= n &lt;= 1. or -0.0 for error asking JVM
*/
public float getHeapOccupancyPercent() {
return this.heapOccupancyPercent;
return this.heapOccupancyPercent == Float.MAX_VALUE ? HEAP_OCCUPANCY_ERROR_VALUE : this.heapOccupancyPercent;
}
private class HeapMemoryTunerChore extends ScheduledChore implements FlushRequestListener {
@ -249,8 +263,15 @@ public class HeapMemoryManager {
@Override
protected void chore() {
// Sample heap occupancy
MemoryUsage memUsage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
heapOccupancyPercent = (float)memUsage.getUsed() / (float)memUsage.getCommitted();
final MemoryUsage usage = MemorySizeUtil.safeGetHeapMemoryUsage();
if (usage != null) {
heapOccupancyPercent = (float)usage.getUsed() / (float)usage.getCommitted();
} else {
// previously, an exception would have meant death for the tuning chore
// so switch to alarming so that we similarly stop tuning until we get
// heap usage information again.
heapOccupancyPercent = Float.MAX_VALUE;
}
// If we are above the heap occupancy alarm low watermark, switch to short
// sleeps for close monitoring. Stop autotuning, we are in a danger zone.
if (heapOccupancyPercent >= heapOccupancyLowWatermark) {

View File

@ -826,13 +826,3 @@ class MemStoreFlusher implements FlushRequester {
}
}
}
enum FlushType {
NORMAL,
ABOVE_ONHEAP_LOWER_MARK, /* happens due to lower mark breach of onheap memstore settings
An offheap memstore can even breach the onheap_lower_mark*/
ABOVE_ONHEAP_HIGHER_MARK,/* happens due to higher mark breach of onheap memstore settings
An offheap memstore can even breach the onheap_higher_mark*/
ABOVE_OFFHEAP_LOWER_MARK,/* happens due to lower mark breach of offheap memstore settings*/
ABOVE_OFFHEAP_HIGHER_MARK;/*/* happens due to higer mark breach of offheap memstore settings*/
}

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.Threads;
import org.junit.After;
@ -328,8 +329,7 @@ public class TestCacheConfig {
BlockCache [] bcs = cbc.getBlockCaches();
assertTrue(bcs[0] instanceof LruBlockCache);
LruBlockCache lbc = (LruBlockCache)bcs[0];
assertEquals(CacheConfig.getLruCacheSize(this.conf,
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax()), lbc.getMaxSize());
assertEquals(MemorySizeUtil.getLruCacheSize(this.conf), lbc.getMaxSize());
assertTrue(bcs[1] instanceof BucketCache);
BucketCache bc = (BucketCache)bcs[1];
// getMaxSize comes back in bytes but we specified size in MB
@ -347,7 +347,7 @@ public class TestCacheConfig {
// from L1 happens, it does not fail because L2 can't take the eviction because block too big.
this.conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.001f);
MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
long lruExpectedSize = CacheConfig.getLruCacheSize(this.conf, mu.getMax());
long lruExpectedSize = MemorySizeUtil.getLruCacheSize(this.conf);
final int bcSize = 100;
long bcExpectedSize = 100 * 1024 * 1024; // MB.
assertTrue(lruExpectedSize < bcExpectedSize);

View File

@ -65,8 +65,6 @@ import org.junit.rules.TestName;
import org.junit.rules.TestRule;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;