From ff5dbf584e7fa3871e058addac568314a9a1ccfa Mon Sep 17 00:00:00 2001 From: Enis Soztutar Date: Wed, 15 Jun 2016 14:19:28 -0700 Subject: [PATCH] HBASE-15950 Fix memstore size estimates to be more tighter --- .../org/apache/hadoop/hbase/KeyValue.java | 14 +- .../apache/hadoop/hbase/util/ClassSize.java | 142 ++++++++++++++---- .../hadoop/hbase/regionserver/HRegion.java | 4 +- .../TestPartialResultsFromClientSide.java | 7 +- .../apache/hadoop/hbase/io/TestHeapSize.java | 35 ++++- .../hadoop/hbase/io/hfile/TestHFileBlock.java | 4 +- 6 files changed, 162 insertions(+), 44 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index 2d0f618be42..d41b63979e7 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.io.util.StreamUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.RawComparator; - import com.google.common.annotations.VisibleForTesting; /** @@ -2684,11 +2683,18 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId, int sum = 0; sum += ClassSize.OBJECT;// the KeyValue object itself sum += ClassSize.REFERENCE;// pointer to "bytes" - sum += ClassSize.align(ClassSize.ARRAY);// "bytes" - sum += ClassSize.align(length);// number of bytes of data in the "bytes" array sum += 2 * Bytes.SIZEOF_INT;// offset, length sum += Bytes.SIZEOF_LONG;// memstoreTS - return ClassSize.align(sum); + + /* + * Deep object overhead for this KV consists of two parts. The first part is the KV object + * itself, while the second part is the backing byte[]. We will only count the array overhead + * from the byte[] only if this is the first KV in there. + */ + return ClassSize.align(sum) + + (offset == 0 + ? ClassSize.sizeOf(bytes, length) // count both length and object overhead + : length); // only count the number of bytes } /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ClassSize.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ClassSize.java index c429924ce70..c0b3128eb0e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ClassSize.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ClassSize.java @@ -39,8 +39,6 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; public class ClassSize { private static final Log LOG = LogFactory.getLog(ClassSize.class); - private static int nrOfRefsPerObj = 2; - /** Array overhead */ public static final int ARRAY; @@ -124,35 +122,125 @@ public class ClassSize { JDK7 = major == 1 && minor == 7; } + /** + * MemoryLayout abstracts details about the JVM object layout. Default implementation is used in + * case Unsafe is not available. + */ + private static class MemoryLayout { + int headerSize() { + return 2 * oopSize(); + } + + int arrayHeaderSize() { + return (int) align(3 * oopSize()); + } + + /** + * Return the size of an "ordinary object pointer". Either 4 or 8, depending on 32/64 bit, + * and CompressedOops + */ + int oopSize() { + return is32BitJVM() ? 4 : 8; + } + + /** + * Aligns a number to 8. + * @param num number to align to 8 + * @return smallest number >= input that is a multiple of 8 + */ + public long align(long num) { + //The 7 comes from that the alignSize is 8 which is the number of bytes + //stored and sent together + return ((num + 7) >> 3) << 3; + } + + long sizeOf(byte[] b, int len) { + return align(arrayHeaderSize() + len); + } + } + + /** + * UnsafeLayout uses Unsafe to guesstimate the object-layout related parameters like object header + * sizes and oop sizes + * See HBASE-15950. + */ + private static class UnsafeLayout extends MemoryLayout { + @SuppressWarnings("unused") + private static final class HeaderSize { + private byte a; + } + + public UnsafeLayout() { + } + + @Override + int headerSize() { + try { + return (int) UnsafeAccess.theUnsafe.objectFieldOffset( + HeaderSize.class.getDeclaredField("a")); + } catch (NoSuchFieldException | SecurityException e) { + LOG.error(e); + } + return super.headerSize(); + } + + @Override + int arrayHeaderSize() { + return UnsafeAccess.theUnsafe.arrayBaseOffset(byte[].class); + } + + @Override + @SuppressWarnings("static-access") + int oopSize() { + // Unsafe.addressSize() returns 8, even with CompressedOops. This is how many bytes each + // element is allocated in an Object[]. + return UnsafeAccess.theUnsafe.ARRAY_OBJECT_INDEX_SCALE; + } + + @Override + @SuppressWarnings("static-access") + long sizeOf(byte[] b, int len) { + return align(arrayHeaderSize() + len * UnsafeAccess.theUnsafe.ARRAY_BYTE_INDEX_SCALE); + } + } + + private static MemoryLayout getMemoryLayout() { + // Have a safeguard in case Unsafe estimate is wrong. This is static context, there is + // no configuration, so we look at System property. + String enabled = System.getProperty("hbase.memorylayout.use.unsafe"); + if (UnsafeAvailChecker.isAvailable() && (enabled == null || Boolean.parseBoolean(enabled))) { + LOG.debug("Using Unsafe to estimate memory layout"); + return new UnsafeLayout(); + } + LOG.debug("Not using Unsafe to estimate memory layout"); + return new MemoryLayout(); + } + + private static final MemoryLayout memoryLayout = getMemoryLayout(); + /** * Method for reading the arc settings and setting overheads according * to 32-bit or 64-bit architecture. */ static { - //Default value is set to 8, covering the case when arcModel is unknown - if (is32BitJVM()) { - REFERENCE = 4; - } else { - REFERENCE = 8; - } + REFERENCE = memoryLayout.oopSize(); - OBJECT = 2 * REFERENCE; + OBJECT = memoryLayout.headerSize(); - ARRAY = align(3 * REFERENCE); + ARRAY = memoryLayout.arrayHeaderSize(); - ARRAYLIST = align(OBJECT + align(REFERENCE) + align(ARRAY) + - (2 * Bytes.SIZEOF_INT)); + ARRAYLIST = align(OBJECT + REFERENCE + (2 * Bytes.SIZEOF_INT)) + align(ARRAY); //noinspection PointlessArithmeticExpression - BYTE_BUFFER = align(OBJECT + align(REFERENCE) + align(ARRAY) + + BYTE_BUFFER = align(OBJECT + REFERENCE + (5 * Bytes.SIZEOF_INT) + - (3 * Bytes.SIZEOF_BOOLEAN) + Bytes.SIZEOF_LONG); + (3 * Bytes.SIZEOF_BOOLEAN) + Bytes.SIZEOF_LONG) + align(ARRAY); INTEGER = align(OBJECT + Bytes.SIZEOF_INT); MAP_ENTRY = align(OBJECT + 5 * REFERENCE + Bytes.SIZEOF_BOOLEAN); - TREEMAP = align(OBJECT + (2 * Bytes.SIZEOF_INT) + align(7 * REFERENCE)); + TREEMAP = align(OBJECT + (2 * Bytes.SIZEOF_INT) + 7 * REFERENCE); // STRING is different size in jdk6 and jdk7. Just use what we estimate as size rather than // have a conditional on whether jdk7. @@ -172,9 +260,9 @@ public class ClassSize { // The size changes from jdk7 to jdk8, estimate the size rather than use a conditional CONCURRENT_SKIPLISTMAP = (int) estimateBase(ConcurrentSkipListMap.class, false); - CONCURRENT_SKIPLISTMAP_ENTRY = align( + CONCURRENT_SKIPLISTMAP_ENTRY = align(OBJECT + (3 * REFERENCE)) + /* one node per entry */ - align((OBJECT + (3 * REFERENCE))/2)); /* one index per two entries */ + align((OBJECT + (3 * REFERENCE))/2); /* one index per two entries */ REENTRANT_LOCK = align(OBJECT + (3 * REFERENCE)); @@ -214,8 +302,7 @@ public class ClassSize { private static int [] getSizeCoefficients(Class cl, boolean debug) { int primitives = 0; int arrays = 0; - //The number of references that a new object takes - int references = nrOfRefsPerObj; + int references = 0; int index = 0; for ( ; null != cl; cl = cl.getSuperclass()) { @@ -271,15 +358,14 @@ public class ClassSize { * @return the size estimate, in bytes */ private static long estimateBaseFromCoefficients(int [] coeff, boolean debug) { - long prealign_size = coeff[0] + align(coeff[1] * ARRAY) + coeff[2] * REFERENCE; + long prealign_size = OBJECT + coeff[0] + coeff[2] * REFERENCE; // Round up to a multiple of 8 - long size = align(prealign_size); - if(debug) { + long size = align(prealign_size) + align(coeff[1] * ARRAY); + if (debug) { if (LOG.isDebugEnabled()) { LOG.debug("Primitives=" + coeff[0] + ", arrays=" + coeff[1] + - ", references(includes " + nrOfRefsPerObj + - " for object overhead)=" + coeff[2] + ", refSize " + REFERENCE + + ", references=" + coeff[2] + ", refSize " + REFERENCE + ", size=" + size + ", prealign_size=" + prealign_size); } } @@ -317,9 +403,7 @@ public class ClassSize { * @return smallest number >= input that is a multiple of 8 */ public static long align(long num) { - //The 7 comes from that the alignSize is 8 which is the number of bytes - //stored and sent together - return ((num + 7) >> 3) << 3; + return memoryLayout.align(num); } /** @@ -331,5 +415,9 @@ public class ClassSize { return model != null && model.equals("32"); } + public static long sizeOf(byte[] b, int len) { + return memoryLayout.sizeOf(b, len); + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 8eb16a6c2f5..6bf4577f244 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -5334,8 +5334,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public void releaseRowLocks(List rowLocks) { if (rowLocks != null) { - for (RowLock rowLock : rowLocks) { - rowLock.release(); + for (int i = 0; i < rowLocks.size(); i++) { + rowLocks.get(i).release(); } rowLocks.clear(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java index 47f36e47055..32ca13d9f95 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java @@ -343,7 +343,7 @@ public class TestPartialResultsFromClientSide { // 2. It is the first result we have seen for that row and thus may have been fetched as // the last group of cells that fit inside the maxResultSize assertTrue( - "Result's cell count differed from expected number. result: " + result, + "Result's cell count differed from expected number. result: " + result.rawCells().length, result.rawCells().length == expectedNumberOfCells || !result.isPartial() || !Bytes.equals(prevRow, result.getRow())); prevRow = result.getRow(); @@ -362,7 +362,7 @@ public class TestPartialResultsFromClientSide { if (CELL_HEAP_SIZE == -1) { // Do a partial scan that will return a single result with a single cell Scan scan = new Scan(); - scan.setMaxResultSize(1); + scan.setMaxResultSize(2); scan.setAllowPartialResults(true); ResultScanner scanner = TABLE.getScanner(scan); @@ -372,6 +372,9 @@ public class TestPartialResultsFromClientSide { assertTrue(result.rawCells() != null); assertTrue(result.rawCells().length == 1); + // Estimate the cell heap size. One difference is that on server side, the KV Heap size is + // estimated differently in case the cell is backed up by MSLAB byte[] (no overhead for + // backing array). Thus below calculation is a bit brittle. CELL_HEAP_SIZE = CellUtil.estimatedHeapSizeOf(result.rawCells()[0]); if (LOG.isInfoEnabled()) LOG.info("Cell heap size: " + CELL_HEAP_SIZE); scanner.close(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java index 07ca2b911fc..27dd96cf600 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java @@ -55,6 +55,7 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Testing the sizing that HeapSize offers and compares to the size given by @@ -66,17 +67,17 @@ public class TestHeapSize { // List of classes implementing HeapSize // BatchOperation, BatchUpdate, BlockIndex, Entry, Entry, HStoreKey // KeyValue, LruBlockCache, LruHashMap, Put, WALKey - + @BeforeClass public static void beforeClass() throws Exception { // Print detail on jvm so we know what is different should below test fail. RuntimeMXBean b = ManagementFactory.getRuntimeMXBean(); - LOG.info("name=" + b.getName()); - LOG.info("specname=" + b.getSpecName()); - LOG.info("specvendor=" + b.getSpecVendor()); + LOG.info("name=" + b.getName()); + LOG.info("specname=" + b.getSpecName()); + LOG.info("specvendor=" + b.getSpecVendor()); LOG.info("vmname=" + b.getVmName()); - LOG.info("vmversion=" + b.getVmVersion()); - LOG.info("vmvendor=" + b.getVmVendor()); + LOG.info("vmversion=" + b.getVmVersion()); + LOG.info("vmvendor=" + b.getVmVendor()); Map p = b.getSystemProperties(); LOG.info("properties=" + p); } @@ -130,7 +131,7 @@ public class TestHeapSize { // Object cl = Object.class; expected = ClassSize.estimateBase(cl, false); - actual = ClassSize.OBJECT; + actual = ClassSize.align(ClassSize.OBJECT); if(expected != actual) { ClassSize.estimateBase(cl, true); assertEquals(expected, actual); @@ -391,5 +392,25 @@ public class TestHeapSize { } } + @Test + public void testReferenceSize() { + LOG.info("ClassSize.REFERENCE is " + ClassSize.REFERENCE); + // oop should be either 4 or 8 + assertTrue(ClassSize.REFERENCE == 4 || ClassSize.REFERENCE == 8); + } + + @Test + public void testObjectSize() throws IOException { + LOG.info("header:" + ClassSize.OBJECT); + LOG.info("array header:" + ClassSize.ARRAY); + + if (ClassSize.is32BitJVM()) { + assertEquals(ClassSize.OBJECT, 8); + } else { + assertTrue(ClassSize.OBJECT == 12 || ClassSize.OBJECT == 16); // depending on CompressedOops + } + assertEquals(ClassSize.OBJECT + 4, ClassSize.ARRAY); + } + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java index 2e3d4a0ab6c..80f99893b62 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java @@ -815,9 +815,9 @@ public class TestHFileBlock { protected void testBlockHeapSizeInternals() { if (ClassSize.is32BitJVM()) { - assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 64); + assertEquals(HFileBlock.BYTE_BUFFER_HEAP_SIZE, 64); } else { - assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 80); + assertEquals(HFileBlock.BYTE_BUFFER_HEAP_SIZE, 64); } for (int size : new int[] { 100, 256, 12345 }) {