HBASE-27225 Add BucketAllocator bucket size statistic logging (#4637)

Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
Bryan Beaudreault 2022-07-25 20:54:40 -04:00
parent f7ffeac554
commit 4e4ebe99c5
3 changed files with 171 additions and 35 deletions

View File

@ -168,12 +168,15 @@ public final class BucketAllocator {
// Free bucket means it has space to allocate a block; // Free bucket means it has space to allocate a block;
// Completely free bucket means it has no block. // Completely free bucket means it has no block.
private LinkedMap bucketList, freeBuckets, completelyFreeBuckets; private LinkedMap bucketList, freeBuckets, completelyFreeBuckets;
// only modified under synchronization, but also read outside it.
private volatile long fragmentationBytes;
private int sizeIndex; private int sizeIndex;
BucketSizeInfo(int sizeIndex) { BucketSizeInfo(int sizeIndex) {
bucketList = new LinkedMap(); bucketList = new LinkedMap();
freeBuckets = new LinkedMap(); freeBuckets = new LinkedMap();
completelyFreeBuckets = new LinkedMap(); completelyFreeBuckets = new LinkedMap();
fragmentationBytes = 0;
this.sizeIndex = sizeIndex; this.sizeIndex = sizeIndex;
} }
@ -193,7 +196,7 @@ public final class BucketAllocator {
* Find a bucket to allocate a block * Find a bucket to allocate a block
* @return the offset in the IOEngine * @return the offset in the IOEngine
*/ */
public long allocateBlock() { public long allocateBlock(int blockSize) {
Bucket b = null; Bucket b = null;
if (freeBuckets.size() > 0) { if (freeBuckets.size() > 0) {
// Use up an existing one first... // Use up an existing one first...
@ -206,6 +209,9 @@ public final class BucketAllocator {
if (b == null) return -1; if (b == null) return -1;
long result = b.allocate(); long result = b.allocate();
blockAllocated(b); blockAllocated(b);
if (blockSize < b.getItemAllocationSize()) {
fragmentationBytes += b.getItemAllocationSize() - blockSize;
}
return result; return result;
} }
@ -236,23 +242,38 @@ public final class BucketAllocator {
completelyFreeBuckets.remove(b); completelyFreeBuckets.remove(b);
} }
public void freeBlock(Bucket b, long offset) { public void freeBlock(Bucket b, long offset, int length) {
assert bucketList.containsKey(b); assert bucketList.containsKey(b);
// else we shouldn't have anything to free... // else we shouldn't have anything to free...
assert (!completelyFreeBuckets.containsKey(b)); assert (!completelyFreeBuckets.containsKey(b));
b.free(offset); b.free(offset);
if (length < b.getItemAllocationSize()) {
fragmentationBytes -= b.getItemAllocationSize() - length;
}
if (!freeBuckets.containsKey(b)) freeBuckets.put(b, b); if (!freeBuckets.containsKey(b)) freeBuckets.put(b, b);
if (b.isCompletelyFree()) completelyFreeBuckets.put(b, b); if (b.isCompletelyFree()) completelyFreeBuckets.put(b, b);
} }
public synchronized IndexStatistics statistics() { public synchronized IndexStatistics statistics() {
long free = 0, used = 0; long free = 0, used = 0;
int full = 0;
for (Object obj : bucketList.keySet()) { for (Object obj : bucketList.keySet()) {
Bucket b = (Bucket) obj; Bucket b = (Bucket) obj;
free += b.freeCount(); free += b.freeCount();
used += b.usedCount(); used += b.usedCount();
if (!b.hasFreeSpace()) {
full++;
} }
return new IndexStatistics(free, used, bucketSizes[sizeIndex]); }
int bucketObjectSize = bucketSizes[sizeIndex];
// this is most likely to always be 1 or 0
int fillingBuckets = Math.max(0, freeBuckets.size() - completelyFreeBuckets.size());
// if bucket capacity is not perfectly divisible by a bucket's object size, there will
// be some left over per bucket. for some object sizes this may be large enough to be
// non-trivial and worth tuning by choosing a more divisible object size.
long wastedBytes = (bucketCapacity % bucketObjectSize) * (full + fillingBuckets);
return new IndexStatistics(free, used, bucketObjectSize, full, completelyFreeBuckets.size(),
wastedBytes, fragmentationBytes);
} }
@Override @Override
@ -434,7 +455,7 @@ public final class BucketAllocator {
+ "; adjust BucketCache sizes " + BlockCacheFactory.BUCKET_CACHE_BUCKETS_KEY + "; adjust BucketCache sizes " + BlockCacheFactory.BUCKET_CACHE_BUCKETS_KEY
+ " to accomodate if size seems reasonable and you want it cached."); + " to accomodate if size seems reasonable and you want it cached.");
} }
long offset = bsi.allocateBlock(); long offset = bsi.allocateBlock(blockSize);
// Ask caller to free up space and try again! // Ask caller to free up space and try again!
if (offset < 0) throw new CacheFullException(blockSize, bsi.sizeIndex()); if (offset < 0) throw new CacheFullException(blockSize, bsi.sizeIndex());
@ -455,11 +476,11 @@ public final class BucketAllocator {
* @param offset block's offset * @param offset block's offset
* @return size freed * @return size freed
*/ */
public synchronized int freeBlock(long offset) { public synchronized int freeBlock(long offset, int length) {
int bucketNo = (int) (offset / bucketCapacity); int bucketNo = (int) (offset / bucketCapacity);
assert bucketNo >= 0 && bucketNo < buckets.length; assert bucketNo >= 0 && bucketNo < buckets.length;
Bucket targetBucket = buckets[bucketNo]; Bucket targetBucket = buckets[bucketNo];
bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset); bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset, length);
usedSize -= targetBucket.getItemAllocationSize(); usedSize -= targetBucket.getItemAllocationSize();
return targetBucket.getItemAllocationSize(); return targetBucket.getItemAllocationSize();
} }
@ -478,50 +499,141 @@ public final class BucketAllocator {
return targetBucket.getItemAllocationSize(); return targetBucket.getItemAllocationSize();
} }
/**
* Statistics to give a glimpse into the distribution of BucketCache objects. Each configured
* bucket size, denoted by {@link BucketSizeInfo}, gets an IndexStatistic. A BucketSizeInfo
* allocates blocks of a configured size from claimed buckets. If you have a bucket size of 512k,
* the corresponding BucketSizeInfo will always allocate chunks of 512k at a time regardless of
* actual request.
* <p>
* Over time, as a BucketSizeInfo gets more allocations, it will claim more buckets from the total
* pool of completelyFreeBuckets. As blocks are freed from a BucketSizeInfo, those buckets may be
* returned to the completelyFreeBuckets pool.
* <p>
* The IndexStatistics help visualize how these buckets are currently distributed, through counts
* of items, bytes, and fullBuckets. Additionally, mismatches between block sizes and bucket sizes
* can manifest in inefficient cache usage. These typically manifest in three ways:
* <p>
* 1. Allocation failures, because block size is larger than max bucket size. These show up in
* logs and can be alleviated by adding larger bucket sizes if appropriate.<br>
* 2. Memory fragmentation, because blocks are typically smaller than the bucket size. See
* {@link #fragmentationBytes()} for details.<br>
* 3. Memory waste, because a bucket's itemSize is not a perfect divisor of bucketCapacity. see
* {@link #wastedBytes()} for details.<br>
*/
static class IndexStatistics { static class IndexStatistics {
private long freeCount, usedCount, itemSize, totalCount; private long freeCount, usedCount, itemSize, totalCount, wastedBytes, fragmentationBytes;
private int fullBuckets, completelyFreeBuckets;
/**
* How many more items can be allocated from the currently claimed blocks of this bucket size
*/
public long freeCount() { public long freeCount() {
return freeCount; return freeCount;
} }
/**
* How many items are currently taking up space in this bucket size's buckets
*/
public long usedCount() { public long usedCount() {
return usedCount; return usedCount;
} }
/**
* Combined {@link #freeCount()} + {@link #usedCount()}
*/
public long totalCount() { public long totalCount() {
return totalCount; return totalCount;
} }
/**
* How many more bytes can be allocated from the currently claimed blocks of this bucket size
*/
public long freeBytes() { public long freeBytes() {
return freeCount * itemSize; return freeCount * itemSize;
} }
/**
* How many bytes are currently taking up space in this bucket size's buckets Note: If your
* items are less than the bucket size of this bucket, the actual used bytes by items will be
* lower than this value. But since a bucket size can only allocate items of a single size, this
* value is the true number of used bytes. The difference will be counted in
* {@link #fragmentationBytes()}.
*/
public long usedBytes() { public long usedBytes() {
return usedCount * itemSize; return usedCount * itemSize;
} }
/**
* Combined {@link #totalCount()} * {@link #itemSize()}
*/
public long totalBytes() { public long totalBytes() {
return totalCount * itemSize; return totalCount * itemSize;
} }
/**
* This bucket size can only allocate items of this size, even if the requested allocation size
* is smaller. The rest goes towards {@link #fragmentationBytes()}.
*/
public long itemSize() { public long itemSize() {
return itemSize; return itemSize;
} }
public IndexStatistics(long free, long used, long itemSize) { /**
setTo(free, used, itemSize); * How many buckets have been completely filled by blocks for this bucket size. These buckets
* can't accept any more blocks unless some existing are freed.
*/
public int fullBuckets() {
return fullBuckets;
}
/**
* How many buckets are currently claimed by this bucket size but as yet totally unused. These
* buckets are available for reallocation to other bucket sizes if those fill up.
*/
public int completelyFreeBuckets() {
return completelyFreeBuckets;
}
/**
* If {@link #bucketCapacity} is not perfectly divisible by this {@link #itemSize()}, the
* remainder will be unusable by in buckets of this size. A high value here may be optimized by
* trying to choose bucket sizes which can better divide {@link #bucketCapacity}.
*/
public long wastedBytes() {
return wastedBytes;
}
/**
* Every time you allocate blocks in these buckets where the block size is less than the bucket
* size, fragmentation increases by that difference. You can reduce fragmentation by lowering
* the bucket size so that it is closer to the typical block size. This may have the consequence
* of bumping some blocks to the next larger bucket size, so experimentation may be needed.
*/
public long fragmentationBytes() {
return fragmentationBytes;
}
public IndexStatistics(long free, long used, long itemSize, int fullBuckets,
int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) {
setTo(free, used, itemSize, fullBuckets, completelyFreeBuckets, wastedBytes,
fragmentationBytes);
} }
public IndexStatistics() { public IndexStatistics() {
setTo(-1, -1, 0); setTo(-1, -1, 0, 0, 0, 0, 0);
} }
public void setTo(long free, long used, long itemSize) { public void setTo(long free, long used, long itemSize, int fullBuckets,
int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) {
this.itemSize = itemSize; this.itemSize = itemSize;
this.freeCount = free; this.freeCount = free;
this.usedCount = used; this.usedCount = used;
this.totalCount = free + used; this.totalCount = free + used;
this.fullBuckets = fullBuckets;
this.completelyFreeBuckets = completelyFreeBuckets;
this.wastedBytes = wastedBytes;
this.fragmentationBytes = fragmentationBytes;
} }
} }
@ -529,26 +641,43 @@ public final class BucketAllocator {
return this.buckets; return this.buckets;
} }
void logStatistics() { void logDebugStatistics() {
if (!LOG.isDebugEnabled()) {
return;
}
IndexStatistics total = new IndexStatistics(); IndexStatistics total = new IndexStatistics();
IndexStatistics[] stats = getIndexStatistics(total); IndexStatistics[] stats = getIndexStatistics(total);
LOG.info("Bucket allocator statistics follow:\n"); LOG.debug("Bucket allocator statistics follow:");
LOG.info(" Free bytes=" + total.freeBytes() + "+; used bytes=" + total.usedBytes() LOG.debug(
+ "; total bytes=" + total.totalBytes()); " Free bytes={}; used bytes={}; total bytes={}; wasted bytes={}; fragmentation bytes={}; "
+ "completelyFreeBuckets={}",
total.freeBytes(), total.usedBytes(), total.totalBytes(), total.wastedBytes(),
total.fragmentationBytes(), total.completelyFreeBuckets());
for (IndexStatistics s : stats) { for (IndexStatistics s : stats) {
LOG.info(" Object size " + s.itemSize() + " used=" + s.usedCount() + "; free=" LOG.debug(
+ s.freeCount() + "; total=" + s.totalCount()); " Object size {}; used={}; free={}; total={}; wasted bytes={}; fragmentation bytes={}, "
+ "full buckets={}",
s.itemSize(), s.usedCount(), s.freeCount(), s.totalCount(), s.wastedBytes(),
s.fragmentationBytes(), s.fullBuckets());
} }
} }
IndexStatistics[] getIndexStatistics(IndexStatistics grandTotal) { IndexStatistics[] getIndexStatistics(IndexStatistics grandTotal) {
IndexStatistics[] stats = getIndexStatistics(); IndexStatistics[] stats = getIndexStatistics();
long totalfree = 0, totalused = 0; long totalfree = 0, totalused = 0, totalWasted = 0, totalFragmented = 0;
int fullBuckets = 0, completelyFreeBuckets = 0;
for (IndexStatistics stat : stats) { for (IndexStatistics stat : stats) {
totalfree += stat.freeBytes(); totalfree += stat.freeBytes();
totalused += stat.usedBytes(); totalused += stat.usedBytes();
totalWasted += stat.wastedBytes();
totalFragmented += stat.fragmentationBytes();
fullBuckets += stat.fullBuckets();
completelyFreeBuckets += stat.completelyFreeBuckets();
} }
grandTotal.setTo(totalfree, totalused, 1); grandTotal.setTo(totalfree, totalused, 1, fullBuckets, completelyFreeBuckets, totalWasted,
totalFragmented);
return stats; return stats;
} }
@ -559,13 +688,6 @@ public final class BucketAllocator {
return stats; return stats;
} }
public long freeBlock(long freeList[]) {
long sz = 0;
for (int i = 0; i < freeList.length; ++i)
sz += freeBlock(freeList[i]);
return sz;
}
public int getBucketIndex(long offset) { public int getBucketIndex(long offset) {
return (int) (offset / bucketCapacity); return (int) (offset / bucketCapacity);
} }

View File

@ -569,7 +569,7 @@ public class BucketCache implements BlockCache, HeapSize {
* {@link BucketEntry#refCnt} becoming 0. * {@link BucketEntry#refCnt} becoming 0.
*/ */
void freeBucketEntry(BucketEntry bucketEntry) { void freeBucketEntry(BucketEntry bucketEntry) {
bucketAllocator.freeBlock(bucketEntry.offset()); bucketAllocator.freeBlock(bucketEntry.offset(), bucketEntry.getLength());
realCacheSize.add(-1 * bucketEntry.getLength()); realCacheSize.add(-1 * bucketEntry.getLength());
} }
@ -727,6 +727,8 @@ public class BucketCache implements BlockCache, HeapSize {
+ cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction() + cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction()
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount()); + ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount());
cacheStats.reset(); cacheStats.reset();
bucketAllocator.logDebugStatistics();
} }
public long getRealCacheSize() { public long getRealCacheSize() {
@ -1108,8 +1110,9 @@ public class BucketCache implements BlockCache, HeapSize {
checkIOErrorIsTolerated(); checkIOErrorIsTolerated();
// Since we failed sync, free the blocks in bucket allocator // Since we failed sync, free the blocks in bucket allocator
for (int i = 0; i < entries.size(); ++i) { for (int i = 0; i < entries.size(); ++i) {
if (bucketEntries[i] != null) { BucketEntry bucketEntry = bucketEntries[i];
bucketAllocator.freeBlock(bucketEntries[i].offset()); if (bucketEntry != null) {
bucketAllocator.freeBlock(bucketEntry.offset(), bucketEntry.getLength());
bucketEntries[i] = null; bucketEntries[i] = null;
} }
} }
@ -1523,7 +1526,7 @@ public class BucketCache implements BlockCache, HeapSize {
succ = true; succ = true;
} finally { } finally {
if (!succ) { if (!succ) {
alloc.freeBlock(offset); alloc.freeBlock(offset, len);
} }
} }
realCacheSize.add(len); realCacheSize.add(len);

View File

@ -23,6 +23,7 @@ import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -56,6 +57,7 @@ import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry;
import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
@ -170,7 +172,7 @@ public class TestBucketCache {
final List<Integer> BLOCKSIZES = Arrays.asList(4 * 1024, 8 * 1024, 64 * 1024, 96 * 1024); final List<Integer> BLOCKSIZES = Arrays.asList(4 * 1024, 8 * 1024, 64 * 1024, 96 * 1024);
boolean full = false; boolean full = false;
ArrayList<Long> allocations = new ArrayList<>(); ArrayList<Pair<Long, Integer>> allocations = new ArrayList<>();
// Fill the allocated extents by choosing a random blocksize. Continues selecting blocks until // Fill the allocated extents by choosing a random blocksize. Continues selecting blocks until
// the cache is completely filled. // the cache is completely filled.
List<Integer> tmp = new ArrayList<>(BLOCKSIZES); List<Integer> tmp = new ArrayList<>(BLOCKSIZES);
@ -178,7 +180,7 @@ public class TestBucketCache {
Integer blockSize = null; Integer blockSize = null;
try { try {
blockSize = randFrom(tmp); blockSize = randFrom(tmp);
allocations.add(mAllocator.allocateBlock(blockSize)); allocations.add(new Pair<>(mAllocator.allocateBlock(blockSize), blockSize));
} catch (CacheFullException cfe) { } catch (CacheFullException cfe) {
tmp.remove(blockSize); tmp.remove(blockSize);
if (tmp.isEmpty()) full = true; if (tmp.isEmpty()) full = true;
@ -189,10 +191,19 @@ public class TestBucketCache {
BucketSizeInfo bucketSizeInfo = mAllocator.roundUpToBucketSizeInfo(blockSize); BucketSizeInfo bucketSizeInfo = mAllocator.roundUpToBucketSizeInfo(blockSize);
IndexStatistics indexStatistics = bucketSizeInfo.statistics(); IndexStatistics indexStatistics = bucketSizeInfo.statistics();
assertEquals("unexpected freeCount for " + bucketSizeInfo, 0, indexStatistics.freeCount()); assertEquals("unexpected freeCount for " + bucketSizeInfo, 0, indexStatistics.freeCount());
// we know the block sizes above are multiples of 1024, but default bucket sizes give an
// additional 1024 on top of that so this counts towards fragmentation in our test
// real life may have worse fragmentation because blocks may not be perfectly sized to block
// size, given encoding/compression and large rows
assertEquals(1024 * indexStatistics.totalCount(), indexStatistics.fragmentationBytes());
} }
for (long offset : allocations) { mAllocator.logDebugStatistics();
assertEquals(mAllocator.sizeOfAllocation(offset), mAllocator.freeBlock(offset));
for (Pair<Long, Integer> allocation : allocations) {
assertEquals(mAllocator.sizeOfAllocation(allocation.getFirst()),
mAllocator.freeBlock(allocation.getFirst(), allocation.getSecond()));
} }
assertEquals(0, mAllocator.getUsedSize()); assertEquals(0, mAllocator.getUsedSize());
} }
@ -579,7 +590,7 @@ public class TestBucketCache {
// initialize an mocked ioengine. // initialize an mocked ioengine.
IOEngine ioEngine = Mockito.mock(IOEngine.class); IOEngine ioEngine = Mockito.mock(IOEngine.class);
Mockito.when(ioEngine.usesSharedMemory()).thenReturn(false); when(ioEngine.usesSharedMemory()).thenReturn(false);
// Mockito.doNothing().when(ioEngine).write(Mockito.any(ByteBuffer.class), Mockito.anyLong()); // Mockito.doNothing().when(ioEngine).write(Mockito.any(ByteBuffer.class), Mockito.anyLong());
Mockito.doThrow(RuntimeException.class).when(ioEngine).write(Mockito.any(ByteBuffer.class), Mockito.doThrow(RuntimeException.class).when(ioEngine).write(Mockito.any(ByteBuffer.class),
Mockito.anyLong()); Mockito.anyLong());