From 1fd772c3b13daf742a869a457926ed7e169e324f Mon Sep 17 00:00:00 2001 From: Bryan Beaudreault Date: Mon, 25 Jul 2022 20:54:40 -0400 Subject: [PATCH] HBASE-27225 Add BucketAllocator bucket size statistic logging (#4637) Signed-off-by: Wellington Chevreuil --- .../io/hfile/bucket/BucketAllocator.java | 174 +++++++++++++++--- .../hbase/io/hfile/bucket/BucketCache.java | 11 +- .../io/hfile/bucket/TestBucketCache.java | 21 ++- 3 files changed, 171 insertions(+), 35 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java index 5d89f0cbdd3..54032e79c6f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java @@ -168,12 +168,15 @@ public final class BucketAllocator { // Free bucket means it has space to allocate a block; // Completely free bucket means it has no block. private LinkedMap bucketList, freeBuckets, completelyFreeBuckets; + // only modified under synchronization, but also read outside it. + private volatile long fragmentationBytes; private int sizeIndex; BucketSizeInfo(int sizeIndex) { bucketList = new LinkedMap(); freeBuckets = new LinkedMap(); completelyFreeBuckets = new LinkedMap(); + fragmentationBytes = 0; this.sizeIndex = sizeIndex; } @@ -193,7 +196,7 @@ public final class BucketAllocator { * Find a bucket to allocate a block * @return the offset in the IOEngine */ - public long allocateBlock() { + public long allocateBlock(int blockSize) { Bucket b = null; if (freeBuckets.size() > 0) { // Use up an existing one first... @@ -206,6 +209,9 @@ public final class BucketAllocator { if (b == null) return -1; long result = b.allocate(); blockAllocated(b); + if (blockSize < b.getItemAllocationSize()) { + fragmentationBytes += b.getItemAllocationSize() - blockSize; + } return result; } @@ -236,23 +242,38 @@ public final class BucketAllocator { completelyFreeBuckets.remove(b); } - public void freeBlock(Bucket b, long offset) { + public void freeBlock(Bucket b, long offset, int length) { assert bucketList.containsKey(b); // else we shouldn't have anything to free... assert (!completelyFreeBuckets.containsKey(b)); b.free(offset); + if (length < b.getItemAllocationSize()) { + fragmentationBytes -= b.getItemAllocationSize() - length; + } if (!freeBuckets.containsKey(b)) freeBuckets.put(b, b); if (b.isCompletelyFree()) completelyFreeBuckets.put(b, b); } public synchronized IndexStatistics statistics() { long free = 0, used = 0; + int full = 0; for (Object obj : bucketList.keySet()) { Bucket b = (Bucket) obj; free += b.freeCount(); used += b.usedCount(); + if (!b.hasFreeSpace()) { + full++; + } } - return new IndexStatistics(free, used, bucketSizes[sizeIndex]); + int bucketObjectSize = bucketSizes[sizeIndex]; + // this is most likely to always be 1 or 0 + int fillingBuckets = Math.max(0, freeBuckets.size() - completelyFreeBuckets.size()); + // if bucket capacity is not perfectly divisible by a bucket's object size, there will + // be some left over per bucket. for some object sizes this may be large enough to be + // non-trivial and worth tuning by choosing a more divisible object size. + long wastedBytes = (bucketCapacity % bucketObjectSize) * (full + fillingBuckets); + return new IndexStatistics(free, used, bucketObjectSize, full, completelyFreeBuckets.size(), + wastedBytes, fragmentationBytes); } @Override @@ -434,7 +455,7 @@ public final class BucketAllocator { + "; adjust BucketCache sizes " + BlockCacheFactory.BUCKET_CACHE_BUCKETS_KEY + " to accomodate if size seems reasonable and you want it cached."); } - long offset = bsi.allocateBlock(); + long offset = bsi.allocateBlock(blockSize); // Ask caller to free up space and try again! if (offset < 0) throw new CacheFullException(blockSize, bsi.sizeIndex()); @@ -455,11 +476,11 @@ public final class BucketAllocator { * @param offset block's offset * @return size freed */ - public synchronized int freeBlock(long offset) { + public synchronized int freeBlock(long offset, int length) { int bucketNo = (int) (offset / bucketCapacity); assert bucketNo >= 0 && bucketNo < buckets.length; Bucket targetBucket = buckets[bucketNo]; - bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset); + bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset, length); usedSize -= targetBucket.getItemAllocationSize(); return targetBucket.getItemAllocationSize(); } @@ -478,50 +499,141 @@ public final class BucketAllocator { return targetBucket.getItemAllocationSize(); } + /** + * Statistics to give a glimpse into the distribution of BucketCache objects. Each configured + * bucket size, denoted by {@link BucketSizeInfo}, gets an IndexStatistic. A BucketSizeInfo + * allocates blocks of a configured size from claimed buckets. If you have a bucket size of 512k, + * the corresponding BucketSizeInfo will always allocate chunks of 512k at a time regardless of + * actual request. + *

+ * Over time, as a BucketSizeInfo gets more allocations, it will claim more buckets from the total + * pool of completelyFreeBuckets. As blocks are freed from a BucketSizeInfo, those buckets may be + * returned to the completelyFreeBuckets pool. + *

+ * The IndexStatistics help visualize how these buckets are currently distributed, through counts + * of items, bytes, and fullBuckets. Additionally, mismatches between block sizes and bucket sizes + * can manifest in inefficient cache usage. These typically manifest in three ways: + *

+ * 1. Allocation failures, because block size is larger than max bucket size. These show up in + * logs and can be alleviated by adding larger bucket sizes if appropriate.
+ * 2. Memory fragmentation, because blocks are typically smaller than the bucket size. See + * {@link #fragmentationBytes()} for details.
+ * 3. Memory waste, because a bucket's itemSize is not a perfect divisor of bucketCapacity. see + * {@link #wastedBytes()} for details.
+ */ static class IndexStatistics { - private long freeCount, usedCount, itemSize, totalCount; + private long freeCount, usedCount, itemSize, totalCount, wastedBytes, fragmentationBytes; + private int fullBuckets, completelyFreeBuckets; + /** + * How many more items can be allocated from the currently claimed blocks of this bucket size + */ public long freeCount() { return freeCount; } + /** + * How many items are currently taking up space in this bucket size's buckets + */ public long usedCount() { return usedCount; } + /** + * Combined {@link #freeCount()} + {@link #usedCount()} + */ public long totalCount() { return totalCount; } + /** + * How many more bytes can be allocated from the currently claimed blocks of this bucket size + */ public long freeBytes() { return freeCount * itemSize; } + /** + * How many bytes are currently taking up space in this bucket size's buckets Note: If your + * items are less than the bucket size of this bucket, the actual used bytes by items will be + * lower than this value. But since a bucket size can only allocate items of a single size, this + * value is the true number of used bytes. The difference will be counted in + * {@link #fragmentationBytes()}. + */ public long usedBytes() { return usedCount * itemSize; } + /** + * Combined {@link #totalCount()} * {@link #itemSize()} + */ public long totalBytes() { return totalCount * itemSize; } + /** + * This bucket size can only allocate items of this size, even if the requested allocation size + * is smaller. The rest goes towards {@link #fragmentationBytes()}. + */ public long itemSize() { return itemSize; } - public IndexStatistics(long free, long used, long itemSize) { - setTo(free, used, itemSize); + /** + * How many buckets have been completely filled by blocks for this bucket size. These buckets + * can't accept any more blocks unless some existing are freed. + */ + public int fullBuckets() { + return fullBuckets; + } + + /** + * How many buckets are currently claimed by this bucket size but as yet totally unused. These + * buckets are available for reallocation to other bucket sizes if those fill up. + */ + public int completelyFreeBuckets() { + return completelyFreeBuckets; + } + + /** + * If {@link #bucketCapacity} is not perfectly divisible by this {@link #itemSize()}, the + * remainder will be unusable by in buckets of this size. A high value here may be optimized by + * trying to choose bucket sizes which can better divide {@link #bucketCapacity}. + */ + public long wastedBytes() { + return wastedBytes; + } + + /** + * Every time you allocate blocks in these buckets where the block size is less than the bucket + * size, fragmentation increases by that difference. You can reduce fragmentation by lowering + * the bucket size so that it is closer to the typical block size. This may have the consequence + * of bumping some blocks to the next larger bucket size, so experimentation may be needed. + */ + public long fragmentationBytes() { + return fragmentationBytes; + } + + public IndexStatistics(long free, long used, long itemSize, int fullBuckets, + int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) { + setTo(free, used, itemSize, fullBuckets, completelyFreeBuckets, wastedBytes, + fragmentationBytes); } public IndexStatistics() { - setTo(-1, -1, 0); + setTo(-1, -1, 0, 0, 0, 0, 0); } - public void setTo(long free, long used, long itemSize) { + public void setTo(long free, long used, long itemSize, int fullBuckets, + int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) { this.itemSize = itemSize; this.freeCount = free; this.usedCount = used; this.totalCount = free + used; + this.fullBuckets = fullBuckets; + this.completelyFreeBuckets = completelyFreeBuckets; + this.wastedBytes = wastedBytes; + this.fragmentationBytes = fragmentationBytes; } } @@ -529,26 +641,43 @@ public final class BucketAllocator { return this.buckets; } - void logStatistics() { + void logDebugStatistics() { + if (!LOG.isDebugEnabled()) { + return; + } + IndexStatistics total = new IndexStatistics(); IndexStatistics[] stats = getIndexStatistics(total); - LOG.info("Bucket allocator statistics follow:\n"); - LOG.info(" Free bytes=" + total.freeBytes() + "+; used bytes=" + total.usedBytes() - + "; total bytes=" + total.totalBytes()); + LOG.debug("Bucket allocator statistics follow:"); + LOG.debug( + " Free bytes={}; used bytes={}; total bytes={}; wasted bytes={}; fragmentation bytes={}; " + + "completelyFreeBuckets={}", + total.freeBytes(), total.usedBytes(), total.totalBytes(), total.wastedBytes(), + total.fragmentationBytes(), total.completelyFreeBuckets()); for (IndexStatistics s : stats) { - LOG.info(" Object size " + s.itemSize() + " used=" + s.usedCount() + "; free=" - + s.freeCount() + "; total=" + s.totalCount()); + LOG.debug( + " Object size {}; used={}; free={}; total={}; wasted bytes={}; fragmentation bytes={}, " + + "full buckets={}", + s.itemSize(), s.usedCount(), s.freeCount(), s.totalCount(), s.wastedBytes(), + s.fragmentationBytes(), s.fullBuckets()); } } IndexStatistics[] getIndexStatistics(IndexStatistics grandTotal) { IndexStatistics[] stats = getIndexStatistics(); - long totalfree = 0, totalused = 0; + long totalfree = 0, totalused = 0, totalWasted = 0, totalFragmented = 0; + int fullBuckets = 0, completelyFreeBuckets = 0; + for (IndexStatistics stat : stats) { totalfree += stat.freeBytes(); totalused += stat.usedBytes(); + totalWasted += stat.wastedBytes(); + totalFragmented += stat.fragmentationBytes(); + fullBuckets += stat.fullBuckets(); + completelyFreeBuckets += stat.completelyFreeBuckets(); } - grandTotal.setTo(totalfree, totalused, 1); + grandTotal.setTo(totalfree, totalused, 1, fullBuckets, completelyFreeBuckets, totalWasted, + totalFragmented); return stats; } @@ -559,13 +688,6 @@ public final class BucketAllocator { return stats; } - public long freeBlock(long freeList[]) { - long sz = 0; - for (int i = 0; i < freeList.length; ++i) - sz += freeBlock(freeList[i]); - return sz; - } - public int getBucketIndex(long offset) { return (int) (offset / bucketCapacity); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 44a45d0fda2..8f4b1830554 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -569,7 +569,7 @@ public class BucketCache implements BlockCache, HeapSize { * {@link BucketEntry#refCnt} becoming 0. */ void freeBucketEntry(BucketEntry bucketEntry) { - bucketAllocator.freeBlock(bucketEntry.offset()); + bucketAllocator.freeBlock(bucketEntry.offset(), bucketEntry.getLength()); realCacheSize.add(-1 * bucketEntry.getLength()); } @@ -727,6 +727,8 @@ public class BucketCache implements BlockCache, HeapSize { + cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction() + ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount()); cacheStats.reset(); + + bucketAllocator.logDebugStatistics(); } public long getRealCacheSize() { @@ -1108,8 +1110,9 @@ public class BucketCache implements BlockCache, HeapSize { checkIOErrorIsTolerated(); // Since we failed sync, free the blocks in bucket allocator for (int i = 0; i < entries.size(); ++i) { - if (bucketEntries[i] != null) { - bucketAllocator.freeBlock(bucketEntries[i].offset()); + BucketEntry bucketEntry = bucketEntries[i]; + if (bucketEntry != null) { + bucketAllocator.freeBlock(bucketEntry.offset(), bucketEntry.getLength()); bucketEntries[i] = null; } } @@ -1523,7 +1526,7 @@ public class BucketCache implements BlockCache, HeapSize { succ = true; } finally { if (!succ) { - alloc.freeBlock(offset); + alloc.freeBlock(offset, len); } } realCacheSize.add(len); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java index b925607b9f8..2ddc31df61f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; import java.io.File; import java.io.IOException; @@ -56,6 +57,7 @@ import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Pair; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -170,7 +172,7 @@ public class TestBucketCache { final List BLOCKSIZES = Arrays.asList(4 * 1024, 8 * 1024, 64 * 1024, 96 * 1024); boolean full = false; - ArrayList allocations = new ArrayList<>(); + ArrayList> allocations = new ArrayList<>(); // Fill the allocated extents by choosing a random blocksize. Continues selecting blocks until // the cache is completely filled. List tmp = new ArrayList<>(BLOCKSIZES); @@ -178,7 +180,7 @@ public class TestBucketCache { Integer blockSize = null; try { blockSize = randFrom(tmp); - allocations.add(mAllocator.allocateBlock(blockSize)); + allocations.add(new Pair<>(mAllocator.allocateBlock(blockSize), blockSize)); } catch (CacheFullException cfe) { tmp.remove(blockSize); if (tmp.isEmpty()) full = true; @@ -189,10 +191,19 @@ public class TestBucketCache { BucketSizeInfo bucketSizeInfo = mAllocator.roundUpToBucketSizeInfo(blockSize); IndexStatistics indexStatistics = bucketSizeInfo.statistics(); assertEquals("unexpected freeCount for " + bucketSizeInfo, 0, indexStatistics.freeCount()); + + // we know the block sizes above are multiples of 1024, but default bucket sizes give an + // additional 1024 on top of that so this counts towards fragmentation in our test + // real life may have worse fragmentation because blocks may not be perfectly sized to block + // size, given encoding/compression and large rows + assertEquals(1024 * indexStatistics.totalCount(), indexStatistics.fragmentationBytes()); } - for (long offset : allocations) { - assertEquals(mAllocator.sizeOfAllocation(offset), mAllocator.freeBlock(offset)); + mAllocator.logDebugStatistics(); + + for (Pair allocation : allocations) { + assertEquals(mAllocator.sizeOfAllocation(allocation.getFirst()), + mAllocator.freeBlock(allocation.getFirst(), allocation.getSecond())); } assertEquals(0, mAllocator.getUsedSize()); } @@ -579,7 +590,7 @@ public class TestBucketCache { // initialize an mocked ioengine. IOEngine ioEngine = Mockito.mock(IOEngine.class); - Mockito.when(ioEngine.usesSharedMemory()).thenReturn(false); + when(ioEngine.usesSharedMemory()).thenReturn(false); // Mockito.doNothing().when(ioEngine).write(Mockito.any(ByteBuffer.class), Mockito.anyLong()); Mockito.doThrow(RuntimeException.class).when(ioEngine).write(Mockito.any(ByteBuffer.class), Mockito.anyLong());