HBASE-15102 Fix HeapMemoryTuner overtuning memstore

HeapMemoryTuner often over tunes memstore without looking at
the lower limit of the previous memstore size and causing a
situation in which memstore used size suddenly exceeds the
total memstore size.

Signed-off-by: Elliott Clark <eclark@apache.org>
This commit is contained in:
Ashu Pachauri 2016-01-13 13:49:43 -08:00 committed by Elliott Clark
parent 7d7a2d8712
commit a61f3ecc8b
2 changed files with 172 additions and 127 deletions

View File

@ -124,126 +124,21 @@ class DefaultHeapMemoryTuner implements HeapMemoryTuner {
@Override @Override
public TunerResult tune(TunerContext context) { public TunerResult tune(TunerContext context) {
long blockedFlushCount = context.getBlockedFlushCount(); float curMemstoreSize = context.getCurMemStoreSize();
long unblockedFlushCount = context.getUnblockedFlushCount(); float curBlockCacheSize = context.getCurBlockCacheSize();
long evictCount = context.getEvictCount(); addToRollingStats(context);
long cacheMissCount = context.getCacheMissCount();
long totalFlushCount = blockedFlushCount+unblockedFlushCount;
rollingStatsForCacheMisses.insertDataValue(cacheMissCount);
rollingStatsForFlushes.insertDataValue(totalFlushCount);
rollingStatsForEvictions.insertDataValue(evictCount);
StepDirection newTuneDirection = StepDirection.NEUTRAL;
if (ignoreInitialPeriods < numPeriodsToIgnore) { if (ignoreInitialPeriods < numPeriodsToIgnore) {
// Ignoring the first few tuner periods // Ignoring the first few tuner periods
ignoreInitialPeriods++; ignoreInitialPeriods++;
rollingStatsForTunerSteps.insertDataValue(0); rollingStatsForTunerSteps.insertDataValue(0);
return NO_OP_TUNER_RESULT; return NO_OP_TUNER_RESULT;
} }
String tunerLog = ""; StepDirection newTuneDirection = getTuneDirection(context);
// We can consider memstore or block cache to be sufficient if
// we are using only a minor fraction of what have been already provided to it.
boolean earlyMemstoreSufficientCheck = totalFlushCount == 0
|| context.getCurMemStoreUsed() < context.getCurMemStoreSize()*sufficientMemoryLevel;
boolean earlyBlockCacheSufficientCheck = evictCount == 0 ||
context.getCurBlockCacheUsed() < context.getCurBlockCacheSize()*sufficientMemoryLevel;
float newMemstoreSize; float newMemstoreSize;
float newBlockCacheSize; float newBlockCacheSize;
if (earlyMemstoreSufficientCheck && earlyBlockCacheSufficientCheck) {
// Both memstore and block cache memory seems to be sufficient. No operation required.
newTuneDirection = StepDirection.NEUTRAL;
} else if (earlyMemstoreSufficientCheck) {
// Increase the block cache size and corresponding decrease in memstore size.
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
} else if (earlyBlockCacheSufficientCheck) {
// Increase the memstore size and corresponding decrease in block cache size.
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
} else {
// Early checks for sufficient memory failed. Tuning memory based on past statistics.
// Boolean indicator to show if we need to revert previous step or not.
boolean isReverting = false;
switch (prevTuneDirection) {
// Here we are using number of evictions rather than cache misses because it is more
// strong indicator for deficient cache size. Improving caching is what we
// would like to optimize for in steady state.
case INCREASE_BLOCK_CACHE_SIZE:
if ((double)evictCount > rollingStatsForEvictions.getMean() ||
(double)totalFlushCount > rollingStatsForFlushes.getMean() +
rollingStatsForFlushes.getDeviation()/2.00) {
// Reverting previous step as it was not useful.
// Tuning failed to decrease evictions or tuning resulted in large number of flushes.
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog += "Reverting previous tuning.";
if ((double)evictCount > rollingStatsForEvictions.getMean()) {
tunerLog += " As could not decrease evctions sufficiently.";
} else {
tunerLog += " As number of flushes rose significantly.";
}
isReverting = true;
}
break;
case INCREASE_MEMSTORE_SIZE:
if ((double)totalFlushCount > rollingStatsForFlushes.getMean() ||
(double)evictCount > rollingStatsForEvictions.getMean() +
rollingStatsForEvictions.getDeviation()/2.00) {
// Reverting previous step as it was not useful.
// Tuning failed to decrease flushes or tuning resulted in large number of evictions.
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
tunerLog += "Reverting previous tuning.";
if ((double)totalFlushCount > rollingStatsForFlushes.getMean()) {
tunerLog += " As could not decrease flushes sufficiently.";
} else {
tunerLog += " As number of evictions rose significantly.";
}
isReverting = true;
}
break;
default:
// Last step was neutral, revert doesn't not apply here.
break;
}
// If we are not reverting. We try to tune memory sizes by looking at cache misses / flushes.
if (!isReverting){
// mean +- deviation*0.8 is considered to be normal
// below it its consider low and above it is considered high.
// We can safely assume that the number cache misses, flushes are normally distributed over
// past periods and hence on all the above mentioned classes (normal, high and low)
// are likely to occur with probability 56%, 22%, 22% respectively. Hence there is at
// least ~10% probability that we will not fall in NEUTRAL step.
// This optimization solution is feedback based and we revert when we
// dont find our steps helpful. Hence we want to do tuning only when we have clear
// indications because too many unnecessary tuning may affect the performance of cluster.
if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() -
rollingStatsForCacheMisses.getDeviation()*0.80 &&
(double)totalFlushCount < rollingStatsForFlushes.getMean() -
rollingStatsForFlushes.getDeviation()*0.80) {
// Everything is fine no tuning required
newTuneDirection = StepDirection.NEUTRAL;
} else if ((double)cacheMissCount > rollingStatsForCacheMisses.getMean() +
rollingStatsForCacheMisses.getDeviation()*0.80 &&
(double)totalFlushCount < rollingStatsForFlushes.getMean() -
rollingStatsForFlushes.getDeviation()*0.80) {
// more misses , increasing cache size
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
tunerLog +=
"Increasing block cache size as observed increase in number of cache misses.";
} else if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() -
rollingStatsForCacheMisses.getDeviation()*0.80 &&
(double)totalFlushCount > rollingStatsForFlushes.getMean() +
rollingStatsForFlushes.getDeviation()*0.80) {
// more flushes , increasing memstore size
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog += "Increasing memstore size as observed increase in number of flushes.";
} else if (blockedFlushCount > 0 && prevTuneDirection == StepDirection.NEUTRAL) {
// we do not want blocked flushes
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog += "Increasing memstore size as observed "
+ blockedFlushCount + " blocked flushes.";
} else {
// Default. Not enough facts to do tuning.
newTuneDirection = StepDirection.NEUTRAL;
}
}
}
// Adjusting step size for tuning to get to steady state or restart from steady state. // Adjusting step size for tuning to get to steady state or restart from steady state.
// Even if the step size was 4% and 32 GB memory size, we will be shifting 1 GB back and forth // Even if the step size was 4% and 32 GB memory size, we will be shifting 1 GB back and forth
// per tuner operation and it can affect the performance of cluster so we keep on decreasing // per tuner operation and it can affect the performance of cluster so we keep on decreasing
@ -265,20 +160,30 @@ class DefaultHeapMemoryTuner implements HeapMemoryTuner {
} }
if (step < minimumStepSize) { if (step < minimumStepSize) {
// If step size is too small then we do nothing. // If step size is too small then we do nothing.
LOG.debug("Tuner step size is too low; we will not perform any tuning this time.");
step = 0.0f; step = 0.0f;
newTuneDirection = StepDirection.NEUTRAL; newTuneDirection = StepDirection.NEUTRAL;
} }
// Increase / decrease the memstore / block cahce sizes depending on new tuner step. // Increase / decrease the memstore / block cahce sizes depending on new tuner step.
float globalMemstoreLowerMark = HeapMemorySizeUtil.getGlobalMemStoreLowerMark(conf,
curMemstoreSize);
// We don't want to exert immediate pressure on memstore. So, we decrease its size gracefully;
// we set a minimum bar in the middle of the total memstore size and the lower limit.
float minMemstoreSize = ((globalMemstoreLowerMark + 1) * curMemstoreSize) / 2.00f;
switch (newTuneDirection) { switch (newTuneDirection) {
case INCREASE_BLOCK_CACHE_SIZE: case INCREASE_BLOCK_CACHE_SIZE:
newBlockCacheSize = context.getCurBlockCacheSize() + step; if (curMemstoreSize - step < minMemstoreSize) {
newMemstoreSize = context.getCurMemStoreSize() - step; step = curMemstoreSize - minMemstoreSize;
}
newMemstoreSize = curMemstoreSize - step;
newBlockCacheSize = curBlockCacheSize + step;
rollingStatsForTunerSteps.insertDataValue(-(int)(step*100000)); rollingStatsForTunerSteps.insertDataValue(-(int)(step*100000));
decayingTunerStepSizeSum = (decayingTunerStepSizeSum - step)/2.00f; decayingTunerStepSizeSum = (decayingTunerStepSizeSum - step)/2.00f;
break; break;
case INCREASE_MEMSTORE_SIZE: case INCREASE_MEMSTORE_SIZE:
newBlockCacheSize = context.getCurBlockCacheSize() - step; newBlockCacheSize = curBlockCacheSize - step;
newMemstoreSize = context.getCurMemStoreSize() + step; newMemstoreSize = curMemstoreSize + step;
rollingStatsForTunerSteps.insertDataValue((int)(step*100000)); rollingStatsForTunerSteps.insertDataValue((int)(step*100000));
decayingTunerStepSizeSum = (decayingTunerStepSizeSum + step)/2.00f; decayingTunerStepSizeSum = (decayingTunerStepSizeSum + step)/2.00f;
break; break;
@ -301,13 +206,146 @@ class DefaultHeapMemoryTuner implements HeapMemoryTuner {
} }
TUNER_RESULT.setBlockCacheSize(newBlockCacheSize); TUNER_RESULT.setBlockCacheSize(newBlockCacheSize);
TUNER_RESULT.setMemstoreSize(newMemstoreSize); TUNER_RESULT.setMemstoreSize(newMemstoreSize);
if (LOG.isDebugEnabled()) {
LOG.debug(tunerLog);
}
prevTuneDirection = newTuneDirection; prevTuneDirection = newTuneDirection;
return TUNER_RESULT; return TUNER_RESULT;
} }
/**
* Determine best direction of tuning base on given context.
* @param context The tuner context.
* @return tuning direction.
*/
private StepDirection getTuneDirection(TunerContext context) {
StepDirection newTuneDirection = StepDirection.NEUTRAL;
long blockedFlushCount = context.getBlockedFlushCount();
long unblockedFlushCount = context.getUnblockedFlushCount();
long evictCount = context.getEvictCount();
long cacheMissCount = context.getCacheMissCount();
long totalFlushCount = blockedFlushCount+unblockedFlushCount;
float curMemstoreSize = context.getCurMemStoreSize();
float curBlockCacheSize = context.getCurBlockCacheSize();
StringBuilder tunerLog = new StringBuilder();
// We can consider memstore or block cache to be sufficient if
// we are using only a minor fraction of what have been already provided to it.
boolean earlyMemstoreSufficientCheck = totalFlushCount == 0
|| context.getCurMemStoreUsed() < curMemstoreSize * sufficientMemoryLevel;
boolean earlyBlockCacheSufficientCheck = evictCount == 0 ||
context.getCurBlockCacheUsed() < curBlockCacheSize * sufficientMemoryLevel;
if (earlyMemstoreSufficientCheck && earlyBlockCacheSufficientCheck) {
// Both memstore and block cache memory seems to be sufficient. No operation required.
newTuneDirection = StepDirection.NEUTRAL;
} else if (earlyMemstoreSufficientCheck) {
// Increase the block cache size and corresponding decrease in memstore size.
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
} else if (earlyBlockCacheSufficientCheck) {
// Increase the memstore size and corresponding decrease in block cache size.
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
} else {
// Early checks for sufficient memory failed. Tuning memory based on past statistics.
// Boolean indicator to show if we need to revert previous step or not.
boolean isReverting = false;
switch (prevTuneDirection) {
// Here we are using number of evictions rather than cache misses because it is more
// strong indicator for deficient cache size. Improving caching is what we
// would like to optimize for in steady state.
case INCREASE_BLOCK_CACHE_SIZE:
if ((double)evictCount > rollingStatsForEvictions.getMean() ||
(double)totalFlushCount > rollingStatsForFlushes.getMean() +
rollingStatsForFlushes.getDeviation()/2.00) {
// Reverting previous step as it was not useful.
// Tuning failed to decrease evictions or tuning resulted in large number of flushes.
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog.append("We will revert previous tuning");
if ((double)evictCount > rollingStatsForEvictions.getMean()) {
tunerLog.append(" because we could not decrease evictions sufficiently.");
} else {
tunerLog.append(" because the number of flushes rose significantly.");
}
isReverting = true;
}
break;
case INCREASE_MEMSTORE_SIZE:
if ((double)totalFlushCount > rollingStatsForFlushes.getMean() ||
(double)evictCount > rollingStatsForEvictions.getMean() +
rollingStatsForEvictions.getDeviation()/2.00) {
// Reverting previous step as it was not useful.
// Tuning failed to decrease flushes or tuning resulted in large number of evictions.
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
tunerLog.append("We will revert previous tuning");
if ((double)totalFlushCount > rollingStatsForFlushes.getMean()) {
tunerLog.append(" because we could not decrease flushes sufficiently.");
} else {
tunerLog.append(" because number of evictions rose significantly.");
}
isReverting = true;
}
break;
default:
// Last step was neutral, revert doesn't not apply here.
break;
}
// If we are not reverting. We try to tune memory sizes by looking at cache misses / flushes.
if (!isReverting){
// mean +- deviation*0.8 is considered to be normal
// below it its consider low and above it is considered high.
// We can safely assume that the number cache misses, flushes are normally distributed over
// past periods and hence on all the above mentioned classes (normal, high and low)
// are likely to occur with probability 56%, 22%, 22% respectively. Hence there is at
// least ~10% probability that we will not fall in NEUTRAL step.
// This optimization solution is feedback based and we revert when we
// dont find our steps helpful. Hence we want to do tuning only when we have clear
// indications because too many unnecessary tuning may affect the performance of cluster.
if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() -
rollingStatsForCacheMisses.getDeviation()*0.80 &&
(double)totalFlushCount < rollingStatsForFlushes.getMean() -
rollingStatsForFlushes.getDeviation()*0.80) {
// Everything is fine no tuning required
newTuneDirection = StepDirection.NEUTRAL;
} else if ((double)cacheMissCount > rollingStatsForCacheMisses.getMean() +
rollingStatsForCacheMisses.getDeviation()*0.80 &&
(double)totalFlushCount < rollingStatsForFlushes.getMean() -
rollingStatsForFlushes.getDeviation()*0.80) {
// more misses , increasing cache size
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
tunerLog.append(
"Going to increase block cache size due to increase in number of cache misses.");
} else if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() -
rollingStatsForCacheMisses.getDeviation()*0.80 &&
(double)totalFlushCount > rollingStatsForFlushes.getMean() +
rollingStatsForFlushes.getDeviation()*0.80) {
// more flushes , increasing memstore size
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog.append("Going to increase memstore size due to increase in number of flushes.");
} else if (blockedFlushCount > 0 && prevTuneDirection == StepDirection.NEUTRAL) {
// we do not want blocked flushes
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog.append("Going to increase memstore size due to"
+ blockedFlushCount + " blocked flushes.");
} else {
// Default. Not enough facts to do tuning.
tunerLog.append("Going to do nothing because we "
+ "could not determine best tuning direction");
newTuneDirection = StepDirection.NEUTRAL;
}
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(tunerLog.toString());
}
return newTuneDirection;
}
/**
* Add the given context to the rolling tuner stats.
* @param context The tuner context.
*/
private void addToRollingStats(TunerContext context) {
rollingStatsForCacheMisses.insertDataValue(context.getCacheMissCount());
rollingStatsForFlushes.insertDataValue(context.getBlockedFlushCount() +
context.getUnblockedFlushCount());
rollingStatsForEvictions.insertDataValue(context.getEvictCount());
}
@Override @Override
public Configuration getConf() { public Configuration getConf() {
return this.conf; return this.conf;

View File

@ -227,6 +227,7 @@ public class TestHeapMemoryManager {
blockCache.setTestBlockSize((long) (maxHeapSize * 0.4 * 0.8)); blockCache.setTestBlockSize((long) (maxHeapSize * 0.4 * 0.8));
regionServerAccounting.setTestMemstoreSize(0); regionServerAccounting.setTestMemstoreSize(0);
Configuration conf = HBaseConfiguration.create(); Configuration conf = HBaseConfiguration.create();
conf.setFloat(HeapMemorySizeUtil.MEMSTORE_SIZE_LOWER_LIMIT_KEY, 0.7f);
conf.setFloat(HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY, 0.75f); conf.setFloat(HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY, 0.75f);
conf.setFloat(HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY, 0.10f); conf.setFloat(HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY, 0.10f);
conf.setFloat(HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY, 0.7f); conf.setFloat(HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY, 0.7f);
@ -238,6 +239,11 @@ public class TestHeapMemoryManager {
new RegionServerStub(conf), new RegionServerAccountingStub()); new RegionServerStub(conf), new RegionServerAccountingStub());
long oldMemstoreHeapSize = memStoreFlusher.memstoreSize; long oldMemstoreHeapSize = memStoreFlusher.memstoreSize;
long oldBlockCacheSize = blockCache.maxSize; long oldBlockCacheSize = blockCache.maxSize;
long oldMemstoreLowerMarkSize = 7 * oldMemstoreHeapSize / 10;
long maxTuneSize = oldMemstoreHeapSize - (oldMemstoreLowerMarkSize + oldMemstoreHeapSize) / 2;
float maxStepValue = (maxTuneSize * 1.0f) / oldMemstoreHeapSize;
maxStepValue = maxStepValue > DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE ?
DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE:maxStepValue;
final ChoreService choreService = new ChoreService("TEST_SERVER_NAME"); final ChoreService choreService = new ChoreService("TEST_SERVER_NAME");
heapMemoryManager.start(choreService); heapMemoryManager.start(choreService);
blockCache.evictBlock(null); blockCache.evictBlock(null);
@ -245,20 +251,21 @@ public class TestHeapMemoryManager {
blockCache.evictBlock(null); blockCache.evictBlock(null);
// Allow the tuner to run once and do necessary memory up // Allow the tuner to run once and do necessary memory up
waitForTune(memStoreFlusher, memStoreFlusher.memstoreSize); waitForTune(memStoreFlusher, memStoreFlusher.memstoreSize);
assertHeapSpaceDelta(-(DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE), oldMemstoreHeapSize, assertHeapSpaceDelta(-maxStepValue, oldMemstoreHeapSize, memStoreFlusher.memstoreSize);
memStoreFlusher.memstoreSize); assertHeapSpaceDelta(maxStepValue, oldBlockCacheSize, blockCache.maxSize);
assertHeapSpaceDelta(DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE, oldBlockCacheSize,
blockCache.maxSize);
oldMemstoreHeapSize = memStoreFlusher.memstoreSize; oldMemstoreHeapSize = memStoreFlusher.memstoreSize;
oldBlockCacheSize = blockCache.maxSize; oldBlockCacheSize = blockCache.maxSize;
oldMemstoreLowerMarkSize = 7 * oldMemstoreHeapSize / 10;
maxTuneSize = oldMemstoreHeapSize - (oldMemstoreLowerMarkSize + oldMemstoreHeapSize) / 2;
maxStepValue = (maxTuneSize * 1.0f) / oldMemstoreHeapSize;
maxStepValue = maxStepValue > DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE ?
DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE:maxStepValue;
// Do some more evictions before the next run of HeapMemoryTuner // Do some more evictions before the next run of HeapMemoryTuner
blockCache.evictBlock(null); blockCache.evictBlock(null);
// Allow the tuner to run once and do necessary memory up // Allow the tuner to run once and do necessary memory up
waitForTune(memStoreFlusher, memStoreFlusher.memstoreSize); waitForTune(memStoreFlusher, memStoreFlusher.memstoreSize);
assertHeapSpaceDelta(-(DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE), oldMemstoreHeapSize, assertHeapSpaceDelta(-maxStepValue, oldMemstoreHeapSize, memStoreFlusher.memstoreSize);
memStoreFlusher.memstoreSize); assertHeapSpaceDelta(maxStepValue, oldBlockCacheSize, blockCache.maxSize);
assertHeapSpaceDelta(DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE, oldBlockCacheSize,
blockCache.maxSize);
} }
@Test @Test