HBASE-15950 Fix memstore size estimates to be more tighter

This commit is contained in:
Enis Soztutar 2016-06-15 14:19:28 -07:00
parent b64d5e5f4b
commit ff5dbf584e
6 changed files with 162 additions and 44 deletions

View File

@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.io.util.StreamUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.io.RawComparator;
import com.google.common.annotations.VisibleForTesting;
/**
@ -2684,11 +2683,18 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
int sum = 0;
sum += ClassSize.OBJECT;// the KeyValue object itself
sum += ClassSize.REFERENCE;// pointer to "bytes"
sum += ClassSize.align(ClassSize.ARRAY);// "bytes"
sum += ClassSize.align(length);// number of bytes of data in the "bytes" array
sum += 2 * Bytes.SIZEOF_INT;// offset, length
sum += Bytes.SIZEOF_LONG;// memstoreTS
return ClassSize.align(sum);
/*
* Deep object overhead for this KV consists of two parts. The first part is the KV object
* itself, while the second part is the backing byte[]. We will only count the array overhead
* from the byte[] only if this is the first KV in there.
*/
return ClassSize.align(sum) +
(offset == 0
? ClassSize.sizeOf(bytes, length) // count both length and object overhead
: length); // only count the number of bytes
}
/**

View File

@ -39,8 +39,6 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
public class ClassSize {
private static final Log LOG = LogFactory.getLog(ClassSize.class);
private static int nrOfRefsPerObj = 2;
/** Array overhead */
public static final int ARRAY;
@ -124,35 +122,125 @@ public class ClassSize {
JDK7 = major == 1 && minor == 7;
}
/**
* MemoryLayout abstracts details about the JVM object layout. Default implementation is used in
* case Unsafe is not available.
*/
private static class MemoryLayout {
int headerSize() {
return 2 * oopSize();
}
int arrayHeaderSize() {
return (int) align(3 * oopSize());
}
/**
* Return the size of an "ordinary object pointer". Either 4 or 8, depending on 32/64 bit,
* and CompressedOops
*/
int oopSize() {
return is32BitJVM() ? 4 : 8;
}
/**
* Aligns a number to 8.
* @param num number to align to 8
* @return smallest number >= input that is a multiple of 8
*/
public long align(long num) {
//The 7 comes from that the alignSize is 8 which is the number of bytes
//stored and sent together
return ((num + 7) >> 3) << 3;
}
long sizeOf(byte[] b, int len) {
return align(arrayHeaderSize() + len);
}
}
/**
* UnsafeLayout uses Unsafe to guesstimate the object-layout related parameters like object header
* sizes and oop sizes
* See HBASE-15950.
*/
private static class UnsafeLayout extends MemoryLayout {
@SuppressWarnings("unused")
private static final class HeaderSize {
private byte a;
}
public UnsafeLayout() {
}
@Override
int headerSize() {
try {
return (int) UnsafeAccess.theUnsafe.objectFieldOffset(
HeaderSize.class.getDeclaredField("a"));
} catch (NoSuchFieldException | SecurityException e) {
LOG.error(e);
}
return super.headerSize();
}
@Override
int arrayHeaderSize() {
return UnsafeAccess.theUnsafe.arrayBaseOffset(byte[].class);
}
@Override
@SuppressWarnings("static-access")
int oopSize() {
// Unsafe.addressSize() returns 8, even with CompressedOops. This is how many bytes each
// element is allocated in an Object[].
return UnsafeAccess.theUnsafe.ARRAY_OBJECT_INDEX_SCALE;
}
@Override
@SuppressWarnings("static-access")
long sizeOf(byte[] b, int len) {
return align(arrayHeaderSize() + len * UnsafeAccess.theUnsafe.ARRAY_BYTE_INDEX_SCALE);
}
}
private static MemoryLayout getMemoryLayout() {
// Have a safeguard in case Unsafe estimate is wrong. This is static context, there is
// no configuration, so we look at System property.
String enabled = System.getProperty("hbase.memorylayout.use.unsafe");
if (UnsafeAvailChecker.isAvailable() && (enabled == null || Boolean.parseBoolean(enabled))) {
LOG.debug("Using Unsafe to estimate memory layout");
return new UnsafeLayout();
}
LOG.debug("Not using Unsafe to estimate memory layout");
return new MemoryLayout();
}
private static final MemoryLayout memoryLayout = getMemoryLayout();
/**
* Method for reading the arc settings and setting overheads according
* to 32-bit or 64-bit architecture.
*/
static {
//Default value is set to 8, covering the case when arcModel is unknown
if (is32BitJVM()) {
REFERENCE = 4;
} else {
REFERENCE = 8;
}
REFERENCE = memoryLayout.oopSize();
OBJECT = 2 * REFERENCE;
OBJECT = memoryLayout.headerSize();
ARRAY = align(3 * REFERENCE);
ARRAY = memoryLayout.arrayHeaderSize();
ARRAYLIST = align(OBJECT + align(REFERENCE) + align(ARRAY) +
(2 * Bytes.SIZEOF_INT));
ARRAYLIST = align(OBJECT + REFERENCE + (2 * Bytes.SIZEOF_INT)) + align(ARRAY);
//noinspection PointlessArithmeticExpression
BYTE_BUFFER = align(OBJECT + align(REFERENCE) + align(ARRAY) +
BYTE_BUFFER = align(OBJECT + REFERENCE +
(5 * Bytes.SIZEOF_INT) +
(3 * Bytes.SIZEOF_BOOLEAN) + Bytes.SIZEOF_LONG);
(3 * Bytes.SIZEOF_BOOLEAN) + Bytes.SIZEOF_LONG) + align(ARRAY);
INTEGER = align(OBJECT + Bytes.SIZEOF_INT);
MAP_ENTRY = align(OBJECT + 5 * REFERENCE + Bytes.SIZEOF_BOOLEAN);
TREEMAP = align(OBJECT + (2 * Bytes.SIZEOF_INT) + align(7 * REFERENCE));
TREEMAP = align(OBJECT + (2 * Bytes.SIZEOF_INT) + 7 * REFERENCE);
// STRING is different size in jdk6 and jdk7. Just use what we estimate as size rather than
// have a conditional on whether jdk7.
@ -172,9 +260,9 @@ public class ClassSize {
// The size changes from jdk7 to jdk8, estimate the size rather than use a conditional
CONCURRENT_SKIPLISTMAP = (int) estimateBase(ConcurrentSkipListMap.class, false);
CONCURRENT_SKIPLISTMAP_ENTRY = align(
CONCURRENT_SKIPLISTMAP_ENTRY =
align(OBJECT + (3 * REFERENCE)) + /* one node per entry */
align((OBJECT + (3 * REFERENCE))/2)); /* one index per two entries */
align((OBJECT + (3 * REFERENCE))/2); /* one index per two entries */
REENTRANT_LOCK = align(OBJECT + (3 * REFERENCE));
@ -214,8 +302,7 @@ public class ClassSize {
private static int [] getSizeCoefficients(Class cl, boolean debug) {
int primitives = 0;
int arrays = 0;
//The number of references that a new object takes
int references = nrOfRefsPerObj;
int references = 0;
int index = 0;
for ( ; null != cl; cl = cl.getSuperclass()) {
@ -271,15 +358,14 @@ public class ClassSize {
* @return the size estimate, in bytes
*/
private static long estimateBaseFromCoefficients(int [] coeff, boolean debug) {
long prealign_size = coeff[0] + align(coeff[1] * ARRAY) + coeff[2] * REFERENCE;
long prealign_size = OBJECT + coeff[0] + coeff[2] * REFERENCE;
// Round up to a multiple of 8
long size = align(prealign_size);
if(debug) {
long size = align(prealign_size) + align(coeff[1] * ARRAY);
if (debug) {
if (LOG.isDebugEnabled()) {
LOG.debug("Primitives=" + coeff[0] + ", arrays=" + coeff[1] +
", references(includes " + nrOfRefsPerObj +
" for object overhead)=" + coeff[2] + ", refSize " + REFERENCE +
", references=" + coeff[2] + ", refSize " + REFERENCE +
", size=" + size + ", prealign_size=" + prealign_size);
}
}
@ -317,9 +403,7 @@ public class ClassSize {
* @return smallest number &gt;= input that is a multiple of 8
*/
public static long align(long num) {
//The 7 comes from that the alignSize is 8 which is the number of bytes
//stored and sent together
return ((num + 7) >> 3) << 3;
return memoryLayout.align(num);
}
/**
@ -331,5 +415,9 @@ public class ClassSize {
return model != null && model.equals("32");
}
public static long sizeOf(byte[] b, int len) {
return memoryLayout.sizeOf(b, len);
}
}

View File

@ -5334,8 +5334,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
@Override
public void releaseRowLocks(List<RowLock> rowLocks) {
if (rowLocks != null) {
for (RowLock rowLock : rowLocks) {
rowLock.release();
for (int i = 0; i < rowLocks.size(); i++) {
rowLocks.get(i).release();
}
rowLocks.clear();
}

View File

@ -343,7 +343,7 @@ public class TestPartialResultsFromClientSide {
// 2. It is the first result we have seen for that row and thus may have been fetched as
// the last group of cells that fit inside the maxResultSize
assertTrue(
"Result's cell count differed from expected number. result: " + result,
"Result's cell count differed from expected number. result: " + result.rawCells().length,
result.rawCells().length == expectedNumberOfCells || !result.isPartial()
|| !Bytes.equals(prevRow, result.getRow()));
prevRow = result.getRow();
@ -362,7 +362,7 @@ public class TestPartialResultsFromClientSide {
if (CELL_HEAP_SIZE == -1) {
// Do a partial scan that will return a single result with a single cell
Scan scan = new Scan();
scan.setMaxResultSize(1);
scan.setMaxResultSize(2);
scan.setAllowPartialResults(true);
ResultScanner scanner = TABLE.getScanner(scan);
@ -372,6 +372,9 @@ public class TestPartialResultsFromClientSide {
assertTrue(result.rawCells() != null);
assertTrue(result.rawCells().length == 1);
// Estimate the cell heap size. One difference is that on server side, the KV Heap size is
// estimated differently in case the cell is backed up by MSLAB byte[] (no overhead for
// backing array). Thus below calculation is a bit brittle.
CELL_HEAP_SIZE = CellUtil.estimatedHeapSizeOf(result.rawCells()[0]);
if (LOG.isInfoEnabled()) LOG.info("Cell heap size: " + CELL_HEAP_SIZE);
scanner.close();

View File

@ -55,6 +55,7 @@ import org.junit.Test;
import org.junit.experimental.categories.Category;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Testing the sizing that HeapSize offers and compares to the size given by
@ -66,17 +67,17 @@ public class TestHeapSize {
// List of classes implementing HeapSize
// BatchOperation, BatchUpdate, BlockIndex, Entry, Entry<K,V>, HStoreKey
// KeyValue, LruBlockCache, LruHashMap<K,V>, Put, WALKey
@BeforeClass
public static void beforeClass() throws Exception {
// Print detail on jvm so we know what is different should below test fail.
RuntimeMXBean b = ManagementFactory.getRuntimeMXBean();
LOG.info("name=" + b.getName());
LOG.info("specname=" + b.getSpecName());
LOG.info("specvendor=" + b.getSpecVendor());
LOG.info("name=" + b.getName());
LOG.info("specname=" + b.getSpecName());
LOG.info("specvendor=" + b.getSpecVendor());
LOG.info("vmname=" + b.getVmName());
LOG.info("vmversion=" + b.getVmVersion());
LOG.info("vmvendor=" + b.getVmVendor());
LOG.info("vmversion=" + b.getVmVersion());
LOG.info("vmvendor=" + b.getVmVendor());
Map<String, String> p = b.getSystemProperties();
LOG.info("properties=" + p);
}
@ -130,7 +131,7 @@ public class TestHeapSize {
// Object
cl = Object.class;
expected = ClassSize.estimateBase(cl, false);
actual = ClassSize.OBJECT;
actual = ClassSize.align(ClassSize.OBJECT);
if(expected != actual) {
ClassSize.estimateBase(cl, true);
assertEquals(expected, actual);
@ -391,5 +392,25 @@ public class TestHeapSize {
}
}
@Test
public void testReferenceSize() {
LOG.info("ClassSize.REFERENCE is " + ClassSize.REFERENCE);
// oop should be either 4 or 8
assertTrue(ClassSize.REFERENCE == 4 || ClassSize.REFERENCE == 8);
}
@Test
public void testObjectSize() throws IOException {
LOG.info("header:" + ClassSize.OBJECT);
LOG.info("array header:" + ClassSize.ARRAY);
if (ClassSize.is32BitJVM()) {
assertEquals(ClassSize.OBJECT, 8);
} else {
assertTrue(ClassSize.OBJECT == 12 || ClassSize.OBJECT == 16); // depending on CompressedOops
}
assertEquals(ClassSize.OBJECT + 4, ClassSize.ARRAY);
}
}

View File

@ -815,9 +815,9 @@ public class TestHFileBlock {
protected void testBlockHeapSizeInternals() {
if (ClassSize.is32BitJVM()) {
assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 64);
assertEquals(HFileBlock.BYTE_BUFFER_HEAP_SIZE, 64);
} else {
assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 80);
assertEquals(HFileBlock.BYTE_BUFFER_HEAP_SIZE, 64);
}
for (int size : new int[] { 100, 256, 12345 }) {