From 650b114f062aaaf4b2d111e2b7db07f979950d5a Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Fri, 19 Jun 2009 22:00:54 +0000 Subject: [PATCH] HBASE-1387 Before release verify all object sizes using Ryans' instrumented JVM trick git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@786683 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../org/apache/hadoop/hbase/KeyValue.java | 6 +-- .../org/apache/hadoop/hbase/client/Put.java | 40 +++++++++++---- .../org/apache/hadoop/hbase/client/Scan.java | 6 +-- .../org/apache/hadoop/hbase/io/HeapSize.java | 37 +++++++------- .../apache/hadoop/hbase/io/hfile/HFile.java | 26 +++++++++- .../hadoop/hbase/io/hfile/LruBlockCache.java | 36 ++++++------- .../hadoop/hbase/regionserver/HLogKey.java | 2 +- .../hadoop/hbase/regionserver/LruHashMap.java | 23 +++++---- .../org/apache/hadoop/hbase/util/Bytes.java | 50 ++++++++++++------- .../hadoop/hbase/io/hfile/TestHFile.java | 30 +++++++++++ 11 files changed, 170 insertions(+), 88 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4399e2f98d8..e196239296f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -206,6 +206,8 @@ Release 0.20.0 - Unreleased HBASE-1549 in zookeeper.sh, use localhost instead of 127.0.0.1 HBASE-1534 Got ZooKeeper event, state: Disconnected on HRS and then NPE on reinit + HBASE-1387 Before release verify all object sizes using Ryans' instrumented + JVM trick (Erik Holstad via Stack) IMPROVEMENTS HBASE-1089 Add count of regions on filesystem to master UI; add percentage diff --git a/src/java/org/apache/hadoop/hbase/KeyValue.java b/src/java/org/apache/hadoop/hbase/KeyValue.java index 74ded099a75..13b02e6b915 100644 --- a/src/java/org/apache/hadoop/hbase/KeyValue.java +++ b/src/java/org/apache/hadoop/hbase/KeyValue.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Writable; @@ -1783,9 +1784,8 @@ public class KeyValue implements Writable, HeapSize { // HeapSize public long heapSize() { - int dataLen = bytes.length + (bytes.length % 8); - return HeapSize.OBJECT + HeapSize.BYTE_ARRAY + dataLen + - (2 * HeapSize.INT); + return ClassSize.alignSize(HeapSize.OBJECT + HeapSize.REFERENCE + + HeapSize.BYTE_ARRAY + length + (2 * Bytes.SIZEOF_INT)); } // Writable diff --git a/src/java/org/apache/hadoop/hbase/client/Put.java b/src/java/org/apache/hadoop/hbase/client/Put.java index 4a9d261da55..e7666692e68 100644 --- a/src/java/org/apache/hadoop/hbase/client/Put.java +++ b/src/java/org/apache/hadoop/hbase/client/Put.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; /** @@ -47,10 +48,15 @@ public class Put implements HeapSize, Writable, Comparable { private long timestamp = HConstants.LATEST_TIMESTAMP; private long lockId = -1L; private boolean writeToWAL = true; + private Map> familyMap = new TreeMap>(Bytes.BYTES_COMPARATOR); - /** Constructor for Writable. DO NOT USE */ + private static final long OVERHEAD = ClassSize.alignSize(HeapSize.OBJECT + + 1 * HeapSize.REFERENCE + 1 * HeapSize.ARRAY + 2 * Bytes.SIZEOF_LONG + + 1 * Bytes.SIZEOF_BOOLEAN + 1 * HeapSize.REFERENCE + HeapSize.TREEMAP_SIZE); + + /** Constructor for Writable. DO NOT USE */ public Put() {} /** @@ -124,7 +130,7 @@ public class Put implements HeapSize, Writable, Comparable { public void add(byte [] family, byte [] qualifier, long ts, byte [] value) { List list = familyMap.get(family); if(list == null) { - list = new ArrayList(); + list = new ArrayList(0); } KeyValue kv = new KeyValue(this.row, family, qualifier, ts, KeyValue.Type.Put, value); @@ -263,13 +269,29 @@ public class Put implements HeapSize, Writable, Comparable { //HeapSize public long heapSize() { - long totalSize = 0; - for(Map.Entry> entry : this.familyMap.entrySet()) { - for(KeyValue kv : entry.getValue()) { - totalSize += kv.heapSize(); - } - } - return totalSize; + long heapsize = OVERHEAD; + heapsize += ClassSize.alignSize(this.row.length); + + + for(Map.Entry> entry : this.familyMap.entrySet()) { + //Adding entry overhead + heapsize += HeapSize.MAP_ENTRY_SIZE; + + //Adding key overhead + heapsize += HeapSize.REFERENCE + HeapSize.ARRAY + + ClassSize.alignSize(entry.getKey().length); + + //This part is kinds tricky since the JVM can reuse references if you + //store the same value, but have a good match with SizeOf at the moment + //Adding value overhead + heapsize += HeapSize.REFERENCE + HeapSize.ARRAYLIST_SIZE; + int size = entry.getValue().size(); + heapsize += size * HeapSize.REFERENCE; + for(KeyValue kv : entry.getValue()) { + heapsize += kv.heapSize(); + } + } + return heapsize; } //Writable diff --git a/src/java/org/apache/hadoop/hbase/client/Scan.java b/src/java/org/apache/hadoop/hbase/client/Scan.java index 82506e84eee..972fee1c4c6 100644 --- a/src/java/org/apache/hadoop/hbase/client/Scan.java +++ b/src/java/org/apache/hadoop/hbase/client/Scan.java @@ -85,7 +85,6 @@ public class Scan implements Writable { public Scan(byte [] startRow, Filter filter) { this(startRow); this.filter = filter; - } /** @@ -118,7 +117,6 @@ public class Scan implements Writable { public Scan addFamily(byte [] family) { familyMap.remove(family); familyMap.put(family, null); - return this; } @@ -204,7 +202,7 @@ public class Scan implements Writable { * Get all available versions. */ public Scan setMaxVersions() { - this.maxVersions = Integer.MAX_VALUE; + this.maxVersions = Integer.MAX_VALUE; return this; } @@ -236,7 +234,6 @@ public class Scan implements Writable { */ public Scan setOldFilter(RowFilterInterface filter) { oldFilter = filter; - return this; } @@ -246,7 +243,6 @@ public class Scan implements Writable { */ public Scan setFamilyMap(Map> familyMap) { this.familyMap = familyMap; - return this; } diff --git a/src/java/org/apache/hadoop/hbase/io/HeapSize.java b/src/java/org/apache/hadoop/hbase/io/HeapSize.java index f2a1ad1c513..f2150486fa6 100644 --- a/src/java/org/apache/hadoop/hbase/io/HeapSize.java +++ b/src/java/org/apache/hadoop/hbase/io/HeapSize.java @@ -45,36 +45,35 @@ public interface HeapSize { /** Object overhead is minimum 2 * reference size (8 bytes on 64-bit) */ static final int OBJECT = 2 * REFERENCE; - /** - * The following types are always allocated in blocks of 8 bytes (on 64bit) - * For example, if you have two ints in a class, it will use 8 bytes. - * If you have three ints in a class, it will use 16 bytes. - */ - static final int SHORT = 4; - static final int INT = 4; - static final int FLOAT = 4; - static final int BOOLEAN = 4; - static final int CHAR = 4; - static final int BYTE = 1; - - /** These types are always 8 bytes */ - static final int DOUBLE = 8; - static final int LONG = 8; - /** Array overhead */ static final int ARRAY = 3 * REFERENCE; + + /** OverHead for nested arrays */ static final int MULTI_ARRAY = (4 * REFERENCE) + ARRAY; /** Byte arrays are fixed size below plus its length, 8 byte aligned */ static final int BYTE_ARRAY = 3 * REFERENCE; - static final int BLOCK_SIZE_TAX = 8; - + /** Overhead for ByteBuffer */ static final int BYTE_BUFFER = 56; - + + /** String overhead */ + static final int STRING_SIZE = 64; + + /** Overhead for ArrayList(0) */ + static final int ARRAYLIST_SIZE = 64; + + /** Overhead for TreeMap */ + static final int TREEMAP_SIZE = 80; + + /** Overhead for entry in map */ + static final int MAP_ENTRY_SIZE = 64; + + /** * @return Approximate 'exclusive deep size' of implementing object. Includes * count of payload and hosting object sizings. */ public long heapSize(); + } diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 1a2cb5ea9a1..58b04461152 100644 --- a/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.io.HbaseMapWritable; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.RawComparator; @@ -1333,11 +1334,15 @@ public class HFile { long [] blockOffsets; int [] blockDataSizes; int size = 0; - + /* Needed doing lookup on blocks. */ final RawComparator comparator; + static final int OVERHEAD = (int)ClassSize.alignSize(HeapSize.OBJECT + + 2 * Bytes.SIZEOF_INT + 1 * HeapSize.MULTI_ARRAY + 2 * HeapSize.ARRAY + + 4 * HeapSize.REFERENCE); + /* * Shutdown default constructor */ @@ -1493,8 +1498,25 @@ public class HFile { } public long heapSize() { - return this.size; + long size = OVERHEAD; + + //Calculating the size of blockKeys + if(blockKeys != null) { + for(byte [] bs : blockKeys) { + size += HeapSize.MULTI_ARRAY; + size += ClassSize.alignSize(bs.length); + } + } + if(blockOffsets != null) { + size += blockOffsets.length * Bytes.SIZEOF_LONG; + } + if(blockDataSizes != null) { + size += blockDataSizes.length * Bytes.SIZEOF_INT; + } + + return size; } + } /* diff --git a/src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java index 3c01b4704fe..3d0f4e0c947 100644 --- a/src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java +++ b/src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java @@ -24,6 +24,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; import java.util.ArrayList; import java.util.Collection; @@ -63,10 +65,6 @@ implements HeapSize, Map, BlockCache { /** The default load factor to use */ public static final float DEFAULT_LOAD_FACTOR = 0.75f; - /** Memory overhead of this Object (for HeapSize) */ - private static final int OVERHEAD = 5 * HeapSize.LONG + 2 * HeapSize.INT + - 2 * HeapSize.FLOAT + 3 * HeapSize.REFERENCE + 1 * HeapSize.ARRAY; - /** Load factor allowed (usually 75%) */ private final float loadFactor; /** Number of key/vals in the map */ @@ -91,6 +89,11 @@ implements HeapSize, Map, BlockCache { /** Number of unsuccessful (not found) get() calls */ private long missCount = 0; + /** Memory overhead of this Object (for HeapSize) */ + private static final int OVERHEAD = (int)ClassSize.alignSize(HeapSize.OBJECT + + 1 * Bytes.SIZEOF_FLOAT + 2 * Bytes.SIZEOF_INT + 1 * HeapSize.ARRAY + + 3 * HeapSize.REFERENCE + 4 * Bytes.SIZEOF_LONG); + /** * Constructs a new, empty map with the specified initial capacity, * load factor, and maximum memory usage. @@ -266,8 +269,7 @@ implements HeapSize, Map, BlockCache { * @return hit ratio (double between 0 and 1) */ public double getHitRatio() { - return (double)((double)hitCount/ - ((double)(hitCount+missCount))); + return ((double)hitCount) / ((double)(hitCount+missCount)); } /** @@ -955,10 +957,6 @@ implements HeapSize, Map, BlockCache { */ protected static class Entry implements Map.Entry, HeapSize { - /** The baseline overhead memory usage of this class */ - static final int OVERHEAD = 1 * HeapSize.LONG + 5 * HeapSize.REFERENCE + - 2 * HeapSize.INT; - /** The key */ protected final String key; /** The value */ @@ -976,6 +974,10 @@ implements HeapSize, Map, BlockCache { /** The precomputed heap size of this entry */ protected long heapSize; + /** The baseline overhead memory usage of this class */ + static final int OVERHEAD = HeapSize.OBJECT + 5 * HeapSize.REFERENCE + + 1 * Bytes.SIZEOF_INT + 1 * Bytes.SIZEOF_LONG; + /** * Create a new entry. * @@ -1137,7 +1139,8 @@ implements HeapSize, Map, BlockCache { * @return size of String in bytes */ private long heapSize(String s) { - return HeapSize.OBJECT + alignSize(s.length()*2); + return HeapSize.STRING_SIZE + + ClassSize.alignSize(s.length() * Bytes.SIZEOF_CHAR); } /** @@ -1145,18 +1148,9 @@ implements HeapSize, Map, BlockCache { * @return size of ByteBuffer in bytes */ private long heapSize(ByteBuffer b) { - return HeapSize.BYTE_BUFFER + alignSize(b.capacity()); + return HeapSize.BYTE_BUFFER + ClassSize.alignSize(b.capacity()); } - /** - * Aligns a number to 8. - * @param num number to align to 8 - * @return smallest number >= input that is a multiple of 8 - */ - private long alignSize(long num) { - if(num % 8 == 0) return num; - return (num + (8 - (num % 8))); - } } } diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HLogKey.java b/src/java/org/apache/hadoop/hbase/regionserver/HLogKey.java index 99f5bf89fdb..3d30a322fed 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HLogKey.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HLogKey.java @@ -43,7 +43,7 @@ public class HLogKey implements WritableComparable, HeapSize { // Time at which this edit was written. private long writeTime; private int HEAP_TAX = HeapSize.OBJECT + (2 * HeapSize.BYTE_ARRAY) + - (2 * HeapSize.LONG); + (2 * Bytes.SIZEOF_LONG); /** Writable Consructor -- Do not use. */ public HLogKey() { diff --git a/src/java/org/apache/hadoop/hbase/regionserver/LruHashMap.java b/src/java/org/apache/hadoop/hbase/regionserver/LruHashMap.java index 0fa23fb17fe..40752858fbb 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/LruHashMap.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/LruHashMap.java @@ -19,11 +19,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.hbase.io.HeapSize; - import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -31,6 +26,15 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.util.Bytes; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + + + + /** * The LruHashMap is a memory-aware HashMap with a configurable maximum * memory footprint. @@ -62,8 +66,9 @@ implements HeapSize, Map { private static final float DEFAULT_LOAD_FACTOR = 0.75f; /** Memory overhead of this Object (for HeapSize) */ - private static final int OVERHEAD = 5 * HeapSize.LONG + 2 * HeapSize.INT + - 2 * HeapSize.FLOAT + 3 * HeapSize.REFERENCE + 1 * HeapSize.ARRAY; + private static final int OVERHEAD = 5 * Bytes.SIZEOF_LONG + + 2 * Bytes.SIZEOF_INT + 2 * Bytes.SIZEOF_FLOAT + 3 * HeapSize.REFERENCE + + 1 * HeapSize.ARRAY; /** Load factor allowed (usually 75%) */ private final float loadFactor; @@ -922,8 +927,8 @@ implements HeapSize, Map { protected static class Entry implements Map.Entry, HeapSize { /** The baseline overhead memory usage of this class */ - static final int OVERHEAD = 1 * HeapSize.LONG + 5 * HeapSize.REFERENCE + - 2 * HeapSize.INT; + static final int OVERHEAD = 1 * Bytes.SIZEOF_LONG + 5 * HeapSize.REFERENCE + + 2 * Bytes.SIZEOF_INT; /** The key */ protected final K key; diff --git a/src/java/org/apache/hadoop/hbase/util/Bytes.java b/src/java/org/apache/hadoop/hbase/util/Bytes.java index b152117cda7..72c214e4a4e 100644 --- a/src/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/src/java/org/apache/hadoop/hbase/util/Bytes.java @@ -41,34 +41,45 @@ import org.apache.hadoop.io.WritableUtils; public class Bytes { /** - * Size of long in bytes + * Size of boolean in bytes */ - public static final int SIZEOF_LONG = Long.SIZE/Byte.SIZE; - + public static final int SIZEOF_BOOLEAN = Byte.SIZE/Byte.SIZE; + + /** + * Size of byte in bytes + */ + public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN; + + /** + * Size of char in bytes + */ + public static final int SIZEOF_CHAR = Character.SIZE/Byte.SIZE; + + /** + * Size of double in bytes + */ + public static final int SIZEOF_DOUBLE = Double.SIZE/Byte.SIZE; + + /** + * Size of float in bytes + */ + public static final int SIZEOF_FLOAT = Float.SIZE/Byte.SIZE; + /** * Size of int in bytes */ public static final int SIZEOF_INT = Integer.SIZE/Byte.SIZE; + /** + * Size of long in bytes + */ + public static final int SIZEOF_LONG = Long.SIZE/Byte.SIZE; + /** * Size of short in bytes */ public static final int SIZEOF_SHORT = Short.SIZE/Byte.SIZE; - /** - * Size of float in bytes - */ - public static final int SIZEOF_FLOAT = Float.SIZE/Byte.SIZE; - - /** - * Size of double in bytes - */ - public static final int SIZEOF_DOUBLE = Double.SIZE/Byte.SIZE; - - /** - * Size of byte in bytes - */ - public static final int SIZEOF_BYTE = 1; /** * Estimate of size cost to pay beyond payload in jvm for instance of byte []. @@ -265,11 +276,11 @@ public class Bytes { return result; } - public static String toStringBinary(final byte []b) { + public static String toStringBinary(final byte [] b) { return toStringBinary(b, 0, b.length); } - public static String toStringBinary(final byte []b, int off, int len) { + public static String toStringBinary(final byte [] b, int off, int len) { String result = null; try { String first = new String(b, off, len, "ISO-8859-1"); @@ -1126,4 +1137,5 @@ public class Bytes { } return value; } + } diff --git a/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java index a1ad2f901ad..b66bb08b879 100644 --- a/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java +++ b/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java @@ -29,9 +29,12 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.HFile.BlockIndex; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.RawComparator; /** @@ -244,4 +247,31 @@ public class TestHFile extends HBaseTestCase { writer.append("1".getBytes(), "0".getBytes()); writer.close(); } + + /** + * Checks if the HeapSize calculator is within reason + */ + public void testHeapSizeForBlockIndex() { + ClassSize cs = null; + Class cl = null; + long expected = 0L; + long actual = 0L; + try { + cs = new ClassSize(); + } catch(Exception e) {} + + //KeyValue + cl = BlockIndex.class; + expected = cs.estimateBase(cl, false); + BlockIndex bi = new BlockIndex(Bytes.BYTES_RAWCOMPARATOR); + actual = bi.heapSize(); + //Since we have a [[]] in BlockIndex and the checker only sees the [] we + // miss a MULTI_ARRAY which is 4*Reference = 32 B + actual -= 32; + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + } + } \ No newline at end of file