diff --git a/lucene/core/src/java/org/apache/lucene/util/Constants.java b/lucene/core/src/java/org/apache/lucene/util/Constants.java index 3ee60defc3f..d18fedaec3b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Constants.java +++ b/lucene/core/src/java/org/apache/lucene/util/Constants.java @@ -27,6 +27,11 @@ import org.apache.lucene.LucenePackage; public final class Constants { private Constants() {} // can't construct + /** JVM vendor info. */ + public static final String JVM_VENDOR = System.getProperty("java.vm.vendor"); + public static final String JVM_VERSION = System.getProperty("java.vm.version"); + public static final String JVM_NAME = System.getProperty("java.vm.name"); + /** The value of System.getProperty("java.version"). **/ public static final String JAVA_VERSION = System.getProperty("java.version"); diff --git a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java index 30fddc58739..377db0f85b2 100644 --- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java +++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java @@ -24,14 +24,50 @@ import java.text.DecimalFormatSymbols; import java.util.*; /** - * Estimates the size of Java objects using a simple memory model - * for primitive size information. + * Estimates the size (memory representation) of Java objects. + * + * @see #sizeOf(Object) + * @see #shallowSizeOf(Object) + * @see #shallowSizeOfInstance(Class) * * @lucene.internal */ public final class RamUsageEstimator { + /** + * JVM diagnostic features. + */ + public static enum JvmFeature { + OBJECT_REFERENCE_SIZE("Object reference size estimated using array index scale."), + ARRAY_HEADER_SIZE("Array header size estimated using array based offset."), + FIELD_OFFSETS("Shallow instance size based on field offsets."), + OBJECT_ALIGNMENT("Object alignment retrieved from HotSpotDiagnostic MX bean."); + + public final String description; + + private JvmFeature(String description) { + this.description = description; + } + + @Override + public String toString() { + return super.name() + " (" + description + ")"; + } + } + + /** JVM info string for debugging and reports. */ + public final static String JVM_INFO_STRING; + + /** One kilobyte bytes. */ + public static final long ONE_KB = 1024; - private RamUsageEstimator() {} // no instance + /** One megabyte bytes. */ + public static final long ONE_MB = ONE_KB * ONE_KB; + + /** One gigabyte bytes.*/ + public static final long ONE_GB = ONE_KB * ONE_MB; + + /** No instantiation. */ + private RamUsageEstimator() {} public final static int NUM_BYTES_BOOLEAN = 1; public final static int NUM_BYTES_BYTE = 1; @@ -42,9 +78,19 @@ public final class RamUsageEstimator { public final static int NUM_BYTES_LONG = 8; public final static int NUM_BYTES_DOUBLE = 8; + /** + * Number of bytes this jvm uses to represent an object reference. + */ public final static int NUM_BYTES_OBJECT_REF; - + + /** + * Number of bytes to represent an object header (no fields, no alignments). + */ public final static int NUM_BYTES_OBJECT_HEADER; + + /** + * Number of bytes to represent an array header (no content, but with alignments). + */ public final static int NUM_BYTES_ARRAY_HEADER; /** @@ -69,9 +115,20 @@ public final class RamUsageEstimator { primitiveSizes.put(long.class, Integer.valueOf(NUM_BYTES_LONG)); } + /** + * A handle to sun.misc.Unsafe. + */ private final static Object theUnsafe; + + /** + * A handle to sun.misc.Unsafe#fieldOffset(Field). + */ private final static Method objectFieldOffsetMethod; - private final static boolean useUnsafe, isSupportedJVM; + + /** + * All the supported "internal" JVM features detected at clinit. + */ + private final static EnumSet supportedFeatures; /** * Initialize constants and try to collect information about the JVM internals. @@ -85,80 +142,71 @@ public final class RamUsageEstimator { // so on 64 bit JVMs it'll be align(16 + 4, @8) = 24. int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12; - Object unsafe = null; - Method objectFieldOffsetM = null; - boolean supportedJvm = true; + supportedFeatures = EnumSet.noneOf(JvmFeature.class); + + Class unsafeClass = null; + Object tempTheUnsafe = null; try { - final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + unsafeClass = Class.forName("sun.misc.Unsafe"); final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe"); unsafeField.setAccessible(true); - unsafe = unsafeField.get(null); - - // get object reference size by getting scale factor of Object[] arrays: - try { - final Method arrayIndexScaleM = unsafeClass.getMethod("arrayIndexScale", Class.class); - referenceSize = ((Number) arrayIndexScaleM.invoke(unsafe, Object[].class)).intValue(); - } catch (Exception e) { - // ignore - supportedJvm = false; - } - - // updated best guess based on reference size: - objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8; - arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12; - - // get the object header size: - // - first try out if the field offsets are not scaled (see warning in Unsafe docs) - // - get the object header size by getting the field offset of the first field of a dummy object - // If the scaling is byte-wise and unsafe is available, enable dynamic size measurement for - // estimateRamUsage(). - try { - objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", Field.class); - final Field dummy1Field = DummyTwoLongObject.class.getDeclaredField("dummy1"); - final int ofs1 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy1Field)).intValue(); - final Field dummy2Field = DummyTwoLongObject.class.getDeclaredField("dummy2"); - final int ofs2 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy2Field)).intValue(); - if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) { - final Field baseField = DummyOneFieldObject.class.getDeclaredField("base"); - objectHeader = ((Number) objectFieldOffsetM.invoke(unsafe, baseField)).intValue(); - } else { - // it is not safe to use Unsafe.objectFieldOffset(), - // as it may be scaled (see "cookie" comment in Unsafe), better use defaults - // and conventional size estimation: - objectFieldOffsetM = null; - supportedJvm = false; - } - } catch (Exception e) { - // on exception ensure useUnsafe will be set to false later: - objectFieldOffsetM = null; - supportedJvm = false; - } + tempTheUnsafe = unsafeField.get(null); + } catch (Exception e) { + // Ignore. + } + theUnsafe = tempTheUnsafe; - // Get the array header size by retrieving the array base offset - // (offset of the first element of an array). - try { - final Method arrayBaseOffsetM = unsafeClass.getMethod("arrayBaseOffset", Class.class); - // we calculate that only for byte[] arrays, it's actually the same for all types: - arrayHeader = ((Number) arrayBaseOffsetM.invoke(unsafe, byte[].class)).intValue(); - } catch (Exception e) { - // ignore - supportedJvm = false; + // get object reference size by getting scale factor of Object[] arrays: + try { + final Method arrayIndexScaleM = unsafeClass.getMethod("arrayIndexScale", Class.class); + referenceSize = ((Number) arrayIndexScaleM.invoke(theUnsafe, Object[].class)).intValue(); + supportedFeatures.add(JvmFeature.OBJECT_REFERENCE_SIZE); + } catch (Exception e) { + // ignore. + } + + // "best guess" based on reference size. We will attempt to modify + // these to exact values if there is supported infrastructure. + objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8; + arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12; + + // get the object header size: + // - first try out if the field offsets are not scaled (see warning in Unsafe docs) + // - get the object header size by getting the field offset of the first field of a dummy object + // If the scaling is byte-wise and unsafe is available, enable dynamic size measurement for + // estimateRamUsage(). + Method tempObjectFieldOffsetMethod = null; + try { + final Method objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", Field.class); + final Field dummy1Field = DummyTwoLongObject.class.getDeclaredField("dummy1"); + final int ofs1 = ((Number) objectFieldOffsetM.invoke(theUnsafe, dummy1Field)).intValue(); + final Field dummy2Field = DummyTwoLongObject.class.getDeclaredField("dummy2"); + final int ofs2 = ((Number) objectFieldOffsetM.invoke(theUnsafe, dummy2Field)).intValue(); + if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) { + final Field baseField = DummyOneFieldObject.class.getDeclaredField("base"); + objectHeader = ((Number) objectFieldOffsetM.invoke(theUnsafe, baseField)).intValue(); + supportedFeatures.add(JvmFeature.FIELD_OFFSETS); + tempObjectFieldOffsetMethod = objectFieldOffsetM; } } catch (Exception e) { - // ignore - supportedJvm = false; + // Ignore. + } + objectFieldOffsetMethod = tempObjectFieldOffsetMethod; + + // Get the array header size by retrieving the array base offset + // (offset of the first element of an array). + try { + final Method arrayBaseOffsetM = unsafeClass.getMethod("arrayBaseOffset", Class.class); + // we calculate that only for byte[] arrays, it's actually the same for all types: + arrayHeader = ((Number) arrayBaseOffsetM.invoke(theUnsafe, byte[].class)).intValue(); + supportedFeatures.add(JvmFeature.ARRAY_HEADER_SIZE); + } catch (Exception e) { + // Ignore. } NUM_BYTES_OBJECT_REF = referenceSize; NUM_BYTES_OBJECT_HEADER = objectHeader; NUM_BYTES_ARRAY_HEADER = arrayHeader; - useUnsafe = (unsafe != null && objectFieldOffsetM != null); - if (useUnsafe) { - theUnsafe = unsafe; - objectFieldOffsetMethod = objectFieldOffsetM; - } else { - theUnsafe = objectFieldOffsetMethod = null; - } // Try to get the object alignment (the default seems to be 8 on Hotspot, // regardless of the architecture). @@ -176,18 +224,34 @@ public final class RamUsageEstimator { objectAlignment = Integer.parseInt( vmOption.getClass().getMethod("getValue").invoke(vmOption).toString() ); + supportedFeatures.add(JvmFeature.OBJECT_ALIGNMENT); } catch (InvocationTargetException ite) { if (!(ite.getCause() instanceof IllegalArgumentException)) throw ite; // ignore the error completely and use default of 8 (32 bit JVMs). } } catch (Exception e) { - // ignore - supportedJvm = false; + // Ignore. } + NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment; - isSupportedJVM = supportedJvm; + JVM_INFO_STRING = "[JVM: " + + Constants.JVM_NAME + ", " + Constants.JVM_VERSION + ", " + Constants.JVM_VENDOR + ", " + + Constants.JAVA_VENDOR + ", " + Constants.JAVA_VERSION + "]"; + } + + /** + * Cached information about a given class. + */ + private static final class ClassCache { + public final long alignedShallowInstanceSize; + public final Field[] referenceFields; + + public ClassCache(long alignedShallowInstanceSize, Field[] referenceFields) { + this.alignedShallowInstanceSize = alignedShallowInstanceSize; + this.referenceFields = referenceFields; + } } // Object with just one field to determine the object header size by getting the offset of the dummy field: @@ -204,14 +268,14 @@ public final class RamUsageEstimator { } /** - * Returns true, if the current JVM is supported by {@code RamUsageEstimator}. + * Returns true, if the current JVM is fully supported by {@code RamUsageEstimator}. * If this method returns {@code false} you are maybe using a 3rd party Java VM * that is not supporting Oracle/Sun private APIs. The memory estimates can be * imprecise then (no way of detecting compressed references, alignments, etc.). * Lucene still tries to use sensible defaults. */ public static boolean isSupportedJVM() { - return isSupportedJVM; + return supportedFeatures.size() == JvmFeature.values().length; } /** @@ -272,13 +336,7 @@ public final class RamUsageEstimator { * should be GCed.

*/ public static long sizeOf(Object obj) { - final Set seen = Collections.newSetFromMap(new IdentityHashMap(64)); - try { - return measureObjectSize(obj, seen); - } finally { - // Help the GC. - seen.clear(); - } + return measureObjectSize(obj); } /** @@ -292,7 +350,7 @@ public final class RamUsageEstimator { if (obj == null) return 0; final Class clz = obj.getClass(); if (clz.isArray()) { - return measureArraySize(obj, null); + return shallowSizeOfArray(obj); } else { return shallowSizeOfInstance(clz); } @@ -302,8 +360,8 @@ public final class RamUsageEstimator { * Returns the shallow instance size in bytes an instance of the given class would occupy. * This works with all conventional classes and primitive types, but not with arrays * (the size then depends on the number of elements and varies from object to object). - * Use the array-instance methods instead. * + * @see #shallowSizeOf(Object) * @throws IllegalArgumentException if {@code clazz} is an array class. */ public static long shallowSizeOfInstance(Class clazz) { @@ -313,87 +371,163 @@ public final class RamUsageEstimator { return primitiveSizes.get(clazz); long size = NUM_BYTES_OBJECT_HEADER; - - // Walk type hierarchy - while (clazz != null) { - final Field[] fields = clazz.getDeclaredFields(); - boolean fieldFound = false; - for (final Field f : fields) { - if (Modifier.isStatic(f.getModifiers())) { - continue; - } - size = reflectFieldSize(size, f); - fieldFound = true; + // Walk type hierarchy + for (;clazz != null; clazz = clazz.getSuperclass()) { + final Field[] fields = clazz.getDeclaredFields(); + for (Field f : fields) { + if (!Modifier.isStatic(f.getModifiers())) { + size = adjustForField(size, f); + } } - if (useUnsafe && fieldFound) { - // no need to recurse to superclasses, as all fields are - // added at the end, so we won't find any larger offset - break; - } - clazz = clazz.getSuperclass(); } return alignObjectSize(size); } /** - * Recursive descend into an object. + * Return shallow size of any array. */ - private static long measureObjectSize(Object obj, Set seen) { - if (obj == null) { - return 0; + private static long shallowSizeOfArray(Object array) { + long size = NUM_BYTES_ARRAY_HEADER; + final int len = Array.getLength(array); + if (len > 0) { + Class arrayElementClazz = array.getClass().getComponentType(); + if (arrayElementClazz.isPrimitive()) { + size += (long) len * primitiveSizes.get(arrayElementClazz); + } else { + size += (long) NUM_BYTES_OBJECT_REF * len; + } } + return alignObjectSize(size); + } - // skip if we have seen before - if (seen.contains(obj)) { - return 0; - } + /* + * Non-recursive version of object descend. This consumes more memory than recursive in-depth + * traversal but prevents stack overflows on long chains of objects + * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain + * so not too much). + */ + private static long measureObjectSize(Object root) { + // Objects seen so far. + final IdentityHashSet seen = new IdentityHashSet(); + // Class cache with reference Field and precalculated shallow size. + final IdentityHashMap, ClassCache> classCache = new IdentityHashMap, ClassCache>(); + // Stack of objects pending traversal. Recursion caused stack overflows. + final ArrayList stack = new ArrayList(); + stack.add(root); - // add to seen - seen.add(obj); + long totalSize = 0; + while (!stack.isEmpty()) { + final Object ob = stack.remove(stack.size() - 1); - Class clazz = obj.getClass(); - if (clazz.isArray()) { - return measureArraySize(obj, seen); - } + if (ob == null || seen.contains(ob)) { + continue; + } + seen.add(ob); - long size = NUM_BYTES_OBJECT_HEADER; - long innerSize = 0L; + final Class obClazz = ob.getClass(); + if (obClazz.isArray()) { + /* + * Consider an array, possibly of primitive types. Push any of its references to + * the processing stack and accumulate this array's shallow size. + */ + long size = NUM_BYTES_ARRAY_HEADER; + final int len = Array.getLength(ob); + if (len > 0) { + Class componentClazz = obClazz.getComponentType(); + if (componentClazz.isPrimitive()) { + size += (long) len * primitiveSizes.get(componentClazz); + } else { + size += (long) NUM_BYTES_OBJECT_REF * len; - // walk type hierarchy - while (clazz != null) { - final Field[] fields = clazz.getDeclaredFields(); - for (final Field f : fields) { - if (Modifier.isStatic(f.getModifiers())) { - continue; + // Push refs for traversal later. + for (int i = len; --i >= 0 ;) { + final Object o = Array.get(ob, i); + if (o != null && !seen.contains(o)) { + stack.add(o); + } + } + } } + totalSize += alignObjectSize(size); + } else { + /* + * Consider an object. Push any references it has to the processing stack + * and accumulate this object's shallow size. + */ + try { + ClassCache cachedInfo = classCache.get(obClazz); + if (cachedInfo == null) { + classCache.put(obClazz, cachedInfo = createCacheEntry(obClazz)); + } - size = reflectFieldSize(size, f); - - if (!f.getType().isPrimitive()) { - try { + for (Field f : cachedInfo.referenceFields) { + // Fast path to eliminate redundancies. + final Object o = f.get(ob); + if (o != null && !seen.contains(o)) { + stack.add(o); + } + } + + totalSize += cachedInfo.alignedShallowInstanceSize; + } catch (IllegalAccessException e) { + // this should never happen as we enabled setAccessible(). + throw new RuntimeException("Reflective field access failed?", e); + } + } + } + + // Help the GC (?). + seen.clear(); + stack.clear(); + classCache.clear(); + + return totalSize; + } + + /** + * Create a cached information about shallow size and reference fields for + * a given class. + */ + private static ClassCache createCacheEntry(final Class clazz) { + ClassCache cachedInfo; + long shallowInstanceSize = NUM_BYTES_OBJECT_HEADER; + final ArrayList referenceFields = new ArrayList(32); + for (Class c = clazz; c != null; c = c.getSuperclass()) { + final Field[] fields = c.getDeclaredFields(); + for (final Field f : fields) { + if (!Modifier.isStatic(f.getModifiers())) { + shallowInstanceSize = adjustForField(shallowInstanceSize, f); + + if (!f.getType().isPrimitive()) { f.setAccessible(true); - innerSize += measureObjectSize(f.get(obj), seen); - } catch (IllegalAccessException ex) { - // this should never happen as we enable setAccessible()! - throw new RuntimeException("Cannot reflect instance field: " + - f.getDeclaringClass().getName() + "#" + f.getName(), ex); + referenceFields.add(f); } } } - clazz = clazz.getSuperclass(); } - return alignObjectSize(size) + innerSize; + + cachedInfo = new ClassCache( + alignObjectSize(shallowInstanceSize), + referenceFields.toArray(new Field[referenceFields.size()])); + return cachedInfo; } - - private static long reflectFieldSize(long size, final Field f) { + + /** + * This method returns the maximum representation size of an object. sizeSoFar + * is the object's size measured so far. f is the field being probed. + * + *

The returned offset will be the maximum of whatever was measured so far and + * f field's offset and representation size (unaligned). + */ + private static long adjustForField(long sizeSoFar, final Field f) { final Class type = f.getType(); final int fsize = type.isPrimitive() ? primitiveSizes.get(type) : NUM_BYTES_OBJECT_REF; - if (useUnsafe) { + if (objectFieldOffsetMethod != null) { try { final long offsetPlusSize = ((Number) objectFieldOffsetMethod.invoke(theUnsafe, f)).longValue() + fsize; - return Math.max(size, offsetPlusSize); + return Math.max(sizeSoFar, offsetPlusSize); } catch (IllegalAccessException ex) { throw new RuntimeException("Access problem with sun.misc.Unsafe", ex); } catch (InvocationTargetException ite) { @@ -409,40 +543,22 @@ public final class RamUsageEstimator { f.getDeclaringClass().getName() + "#" + f.getName(), cause); } } else { - return size + fsize; + // TODO: No alignments based on field type/ subclass fields alignments? + return sizeSoFar + fsize; } } - /** - * Return the deep size of an array, including - * sub-objects if there are any. - * - * @param seen A set of already seen objects. If null no references - * are followed and this method returns shallow size. - */ - private static long measureArraySize(Object array, Set seen) { - long size = NUM_BYTES_ARRAY_HEADER; - final int len = Array.getLength(array); - if (len > 0) { - Class arrayElementClazz = array.getClass().getComponentType(); - if (arrayElementClazz.isPrimitive()) { - size += (long) len * primitiveSizes.get(arrayElementClazz); - } else { - size += (long) NUM_BYTES_OBJECT_REF * len; - if (seen != null) { - for (int i = 0; i < len; i++) { - size += measureObjectSize(Array.get(array, i), seen); - } - } - } - } - - return alignObjectSize(size); + /** Return the set of unsupported JVM features that improve the estimation. */ + public static EnumSet getUnsupportedFeatures() { + EnumSet unsupported = EnumSet.allOf(JvmFeature.class); + unsupported.removeAll(supportedFeatures); + return unsupported; } - public static final long ONE_KB = 1024; - public static final long ONE_MB = ONE_KB * ONE_KB; - public static final long ONE_GB = ONE_KB * ONE_MB; + /** Return the set of supported JVM features that improve the estimation. */ + public static EnumSet getSupportedFeatures() { + return EnumSet.copyOf(supportedFeatures); + } /** * Returns size in human-readable units (GB, MB, KB or bytes). @@ -466,4 +582,252 @@ public final class RamUsageEstimator { return bytes + " bytes"; } } + + /** + * Return a human-readable size of a given object. + * @see #sizeOf(Object) + * @see #humanReadableUnits(long) + */ + public static String humanSizeOf(Object object) { + return humanReadableUnits(sizeOf(object)); + } + + /** + * An identity hash set implemented using open addressing. No null keys are allowed. + * + * TODO: If this is useful outside this class, make it public - needs some work + */ + static final class IdentityHashSet implements Iterable { + /** + * Default load factor. + */ + public final static float DEFAULT_LOAD_FACTOR = 0.75f; + + /** + * Minimum capacity for the set. + */ + public final static int MIN_CAPACITY = 4; + + /** + * All of set entries. Always of power of two length. + */ + public Object[] keys; + + /** + * Cached number of assigned slots. + */ + public int assigned; + + /** + * The load factor for this set (fraction of allocated or deleted slots before + * the buffers must be rehashed or reallocated). + */ + public final float loadFactor; + + /** + * Cached capacity threshold at which we must resize the buffers. + */ + private int resizeThreshold; + + /** + * Creates a hash set with the default capacity of 16. + * load factor of {@value #DEFAULT_LOAD_FACTOR}. ` + */ + public IdentityHashSet() { + this(16, DEFAULT_LOAD_FACTOR); + } + + /** + * Creates a hash set with the given capacity, load factor of + * {@value #DEFAULT_LOAD_FACTOR}. + */ + public IdentityHashSet(int initialCapacity) { + this(initialCapacity, DEFAULT_LOAD_FACTOR); + } + + /** + * Creates a hash set with the given capacity and load factor. + */ + public IdentityHashSet(int initialCapacity, float loadFactor) { + initialCapacity = Math.max(MIN_CAPACITY, initialCapacity); + + assert initialCapacity > 0 : "Initial capacity must be between (0, " + + Integer.MAX_VALUE + "]."; + assert loadFactor > 0 && loadFactor < 1 : "Load factor must be between (0, 1)."; + this.loadFactor = loadFactor; + allocateBuffers(roundCapacity(initialCapacity)); + } + + /** + * Adds a reference to the set. Null keys are not allowed. + */ + public boolean add(KType e) { + assert e != null : "Null keys not allowed."; + + if (assigned >= resizeThreshold) expandAndRehash(); + + final int mask = keys.length - 1; + int slot = rehash(e) & mask; + Object existing; + while ((existing = keys[slot]) != null) { + if (e == existing) { + return false; // already found. + } + slot = (slot + 1) & mask; + } + assigned++; + keys[slot] = e; + return true; + } + + /** + * Checks if the set contains a given ref. + */ + public boolean contains(KType e) { + final int mask = keys.length - 1; + int slot = rehash(e) & mask; + Object existing; + while ((existing = keys[slot]) != null) { + if (e == existing) { + return true; + } + slot = (slot + 1) & mask; + } + return false; + } + + /** Rehash via MurmurHash. + * + *

The implementation is based on the + * finalization step from Austin Appleby's + * MurmurHash3. + * + * @see "http://sites.google.com/site/murmurhash/" + */ + private static int rehash(Object o) { + int k = System.identityHashCode(o); + k ^= k >>> 16; + k *= 0x85ebca6b; + k ^= k >>> 13; + k *= 0xc2b2ae35; + k ^= k >>> 16; + return k; + } + + /** + * Expand the internal storage buffers (capacity) or rehash current keys and + * values if there are a lot of deleted slots. + */ + private void expandAndRehash() { + final Object[] oldKeys = this.keys; + + assert assigned >= resizeThreshold; + allocateBuffers(nextCapacity(keys.length)); + + /* + * Rehash all assigned slots from the old hash table. + */ + final int mask = keys.length - 1; + for (int i = 0; i < oldKeys.length; i++) { + final Object key = oldKeys[i]; + if (key != null) { + int slot = rehash(key) & mask; + while (keys[slot] != null) { + slot = (slot + 1) & mask; + } + keys[slot] = key; + } + } + Arrays.fill(oldKeys, null); + } + + /** + * Allocate internal buffers for a given capacity. + * + * @param capacity + * New capacity (must be a power of two). + */ + private void allocateBuffers(int capacity) { + this.keys = new Object[capacity]; + this.resizeThreshold = (int) (capacity * DEFAULT_LOAD_FACTOR); + } + + /** + * Return the next possible capacity, counting from the current buffers' size. + */ + protected int nextCapacity(int current) { + assert current > 0 && Long.bitCount(current) == 1 : "Capacity must be a power of two."; + assert ((current << 1) > 0) : "Maximum capacity exceeded (" + + (0x80000000 >>> 1) + ")."; + + if (current < MIN_CAPACITY / 2) current = MIN_CAPACITY / 2; + return current << 1; + } + + /** + * Round the capacity to the next allowed value. + */ + protected int roundCapacity(int requestedCapacity) { + // Maximum positive integer that is a power of two. + if (requestedCapacity > (0x80000000 >>> 1)) return (0x80000000 >>> 1); + + int capacity = MIN_CAPACITY; + while (capacity < requestedCapacity) { + capacity <<= 1; + } + + return capacity; + } + + public void clear() { + assigned = 0; + Arrays.fill(keys, null); + } + + public int size() { + return assigned; + } + + public boolean isEmpty() { + return size() == 0; + } + + @Override + public Iterator iterator() { + return new Iterator() { + int pos = -1; + Object nextElement = fetchNext(); + + @Override + public boolean hasNext() { + return nextElement != null; + } + + @SuppressWarnings("unchecked") + @Override + public KType next() { + Object r = this.nextElement; + if (r == null) { + throw new NoSuchElementException(); + } + this.nextElement = fetchNext(); + return (KType) r; + } + + private Object fetchNext() { + pos++; + while (pos < keys.length && keys[pos] == null) { + pos++; + } + + return (pos >= keys.length ? null : keys[pos]); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java b/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java new file mode 100644 index 00000000000..89f53a1ef1c --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java @@ -0,0 +1,39 @@ +package org.apache.lucene.util; + +import java.util.*; + +import org.junit.Assert; +import org.junit.Test; + +public class TestIdentityHashSet extends LuceneTestCase { + @Test + public void testCheck() { + Random rnd = random; + Set jdk = Collections.newSetFromMap( + new IdentityHashMap()); + RamUsageEstimator.IdentityHashSet us = new RamUsageEstimator.IdentityHashSet(); + + int max = 100000; + int threshold = 256; + for (int i = 0; i < max; i++) { + // some of these will be interned and some will not so there will be collisions. + Integer v = rnd.nextInt(threshold); + + boolean e1 = jdk.contains(v); + boolean e2 = us.contains(v); + Assert.assertEquals(e1, e2); + + e1 = jdk.add(v); + e2 = us.add(v); + Assert.assertEquals(e1, e2); + } + + Set collected = Collections.newSetFromMap( + new IdentityHashMap()); + for (Object o : us) { + collected.add(o); + } + + Assert.assertEquals(collected, jdk); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java index 47814071c41..206d929463d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java @@ -22,86 +22,86 @@ import java.util.Random; */ public class TestRamUsageEstimator extends LuceneTestCase { + public void testSanity() { + assertTrue(sizeOf(new String("test string")) > shallowSizeOfInstance(String.class)); - public void testBasic() { - assertTrue(sizeOf(new String("test strin")) > shallowSizeOfInstance(String.class)); - Holder holder = new Holder(); holder.holder = new Holder("string2", 5000L); assertTrue(sizeOf(holder) > shallowSizeOfInstance(Holder.class)); assertTrue(sizeOf(holder) > sizeOf(holder.holder)); - assertTrue(shallowSizeOfInstance(HolderSubclass.class) >= shallowSizeOfInstance(Holder.class)); - assertEquals(shallowSizeOfInstance(Holder.class), shallowSizeOfInstance(HolderSubclass2.class)); + assertTrue( + shallowSizeOfInstance(HolderSubclass.class) >= shallowSizeOfInstance(Holder.class)); + assertTrue( + shallowSizeOfInstance(Holder.class) == shallowSizeOfInstance(HolderSubclass2.class)); - String[] strings = new String[]{new String("test strin"), new String("hollow"), new String("catchmaster")}; + String[] strings = new String[] { + new String("test string"), + new String("hollow"), + new String("catchmaster") + }; assertTrue(sizeOf(strings) > shallowSizeOf(strings)); } public void testStaticOverloads() { Random rnd = random; - { - byte[] array = new byte [rnd.nextInt(1024)]; + byte[] array = new byte[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } - + { - boolean[] array = new boolean [rnd.nextInt(1024)]; + boolean[] array = new boolean[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } - + { - char[] array = new char [rnd.nextInt(1024)]; + char[] array = new char[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } - + { - short[] array = new short [rnd.nextInt(1024)]; + short[] array = new short[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } - + { - int[] array = new int [rnd.nextInt(1024)]; + int[] array = new int[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } - + { - float[] array = new float [rnd.nextInt(1024)]; + float[] array = new float[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } - + { - long[] array = new long [rnd.nextInt(1024)]; + long[] array = new long[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } - + { - double[] array = new double [rnd.nextInt(1024)]; + double[] array = new double[rnd.nextInt(1024)]; assertEquals(sizeOf(array), sizeOf((Object) array)); } } - - public void testReferenceSize() { - if (!isSupportedJVM()) { - System.err.println("WARN: Your JVM does not support the Oracle/Sun extensions (Hotspot diagnostics, sun.misc.Unsafe),"); - System.err.println("so the memory estimates may be inprecise."); - System.err.println("Please report this to the Lucene mailing list, noting your JVM version: " + - Constants.JAVA_VENDOR + " " + Constants.JAVA_VERSION); - } - if (VERBOSE) { - System.out.println("This JVM is 64bit: " + Constants.JRE_IS_64BIT); - System.out.println("Reference size in this JVM: " + NUM_BYTES_OBJECT_REF); - System.out.println("Object header size in this JVM: " + NUM_BYTES_OBJECT_HEADER); - System.out.println("Array header size in this JVM: " + NUM_BYTES_ARRAY_HEADER); - System.out.println("Object alignment in this JVM: " + NUM_BYTES_OBJECT_ALIGNMENT); - } - assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8); - if (!Constants.JRE_IS_64BIT) { - assertEquals("For 32bit JVMs, reference size must always be 4", 4, NUM_BYTES_OBJECT_REF); - } - } + public void testReferenceSize() { + if (!isSupportedJVM()) { + System.err.println("WARN: Your JVM does not support certain Oracle/Sun extensions."); + System.err.println(" Memory estimates may be inaccurate."); + System.err.println(" Please report this to the Lucene mailing list. JVM version: " + RamUsageEstimator.JVM_INFO_STRING); + for (JvmFeature f : RamUsageEstimator.getUnsupportedFeatures()) { + System.err.println(" - " + f.toString()); + } + } + + assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8); + if (!Constants.JRE_IS_64BIT) { + assertEquals("For 32bit JVMs, reference size must always be 4?", 4, NUM_BYTES_OBJECT_REF); + } + } + @SuppressWarnings("unused") private static class Holder { long field1 = 5000L; @@ -109,8 +109,7 @@ public class TestRamUsageEstimator extends LuceneTestCase { Holder holder; long field2, field3, field4; - Holder() { - } + Holder() {} Holder(String name, long field1) { this.name = name; @@ -123,7 +122,7 @@ public class TestRamUsageEstimator extends LuceneTestCase { byte foo; int bar; } - + private static class HolderSubclass2 extends Holder { // empty, only inherits all fields -> size should be identical to superclass } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java new file mode 100644 index 00000000000..6009e391757 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java @@ -0,0 +1,37 @@ +package org.apache.lucene.util; + +import org.junit.Assert; + +/** + * Check large and special graphs. + */ +public class TestRamUsageEstimatorOnWildAnimals extends LuceneTestCase { + public static class ListElement { + ListElement next; + } + + public void testOverflowMaxChainLength() { + int UPPERLIMIT = 100000; + int lower = 0; + int upper = UPPERLIMIT; + + while (lower + 1 < upper) { + int mid = (lower + upper) / 2; + try { + ListElement first = new ListElement(); + ListElement last = first; + for (int i = 0; i < mid; i++) { + last = (last.next = new ListElement()); + } + RamUsageEstimator.sizeOf(first); // cause SOE or pass. + lower = mid; + } catch (StackOverflowError e) { + upper = mid; + } + } + + if (lower + 1 < UPPERLIMIT) { + Assert.fail("Max object chain length till stack overflow: " + lower); + } + } +}