LUCENE-3867: Refactor RamUsageEstimator. CHANGES.txt will be added once backported to 3.x.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1302133 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-03-18 14:59:10 +00:00
parent cd05c6f0c3
commit c429736260
16 changed files with 669 additions and 389 deletions

View File

@ -59,6 +59,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants; // for javadocs
import org.apache.lucene.util.RamUsageEstimator;
/**
* High-performance single-document main memory Apache Lucene fulltext search index.
@ -473,39 +474,12 @@ public class MemoryIndex {
/**
* Returns a reasonable approximation of the main memory [bytes] consumed by
* this instance. Useful for smart memory sensititive caches/pools. Assumes
* fieldNames are interned, whereas tokenized terms are memory-overlaid.
*
* this instance. Useful for smart memory sensititive caches/pools.
* @return the main memory consumption
*/
public int getMemorySize() {
// for example usage in a smart cache see nux.xom.pool.Pool
int PTR = VM.PTR;
int INT = VM.INT;
int size = 0;
size += VM.sizeOfObject(2*PTR + INT); // memory index
if (sortedFields != null) size += VM.sizeOfObjectArray(sortedFields.length);
size += VM.sizeOfHashMap(fields.size());
for (Map.Entry<String, Info> entry : fields.entrySet()) { // for each Field Info
Info info = entry.getValue();
size += VM.sizeOfObject(2*INT + 3*PTR); // Info instance vars
if (info.sortedTerms != null) size += VM.sizeOfObjectArray(info.sortedTerms.length);
int len = info.terms.size();
size += VM.sizeOfHashMap(len);
Iterator<Map.Entry<BytesRef,ArrayIntList>> iter2 = info.terms.entrySet().iterator();
while (--len >= 0) { // for each term
Map.Entry<BytesRef,ArrayIntList> e = iter2.next();
// FIXME: this calculation is probably not correct since we use bytes now.
size += VM.sizeOfObject(PTR + 3*INT); // assumes substring() memory overlay
// size += STR + 2 * ((String) e.getKey()).length();
ArrayIntList positions = e.getValue();
size += VM.sizeOfArrayIntList(positions.size());
}
}
return size;
}
public long getMemorySize() {
return RamUsageEstimator.sizeOf(this);
}
private int numPositions(ArrayIntList positions) {
return positions.size() / stride;
@ -1126,61 +1100,4 @@ public class MemoryIndex {
return norms;
}
}
///////////////////////////////////////////////////////////////////////////////
// Nested classes:
///////////////////////////////////////////////////////////////////////////////
private static final class VM {
public static final int PTR = Constants.JRE_IS_64BIT ? 8 : 4;
public static final int INT = 4;
private static final int LOG_PTR = (int) Math.round(log2(PTR));
/**
* Object header of any heap allocated Java object.
* ptr to class, info for monitor, gc, hash, etc.
*/
private static final int OBJECT_HEADER = 2*PTR;
private VM() {} // not instantiable
// assumes n > 0
// 64 bit VM:
// 0 --> 0*PTR
// 1..8 --> 1*PTR
// 9..16 --> 2*PTR
private static int sizeOf(int n) {
return (((n-1) >> LOG_PTR) + 1) << LOG_PTR;
}
public static int sizeOfObject(int n) {
return sizeOf(OBJECT_HEADER + n);
}
public static int sizeOfObjectArray(int len) {
return sizeOfObject(INT + PTR*len);
}
public static int sizeOfIntArray(int len) {
return sizeOfObject(INT + INT*len);
}
public static int sizeOfHashMap(int len) {
return sizeOfObject(4*PTR + 4*INT) + sizeOfObjectArray(len)
+ len * sizeOfObject(3*PTR + INT); // entries
}
public static int sizeOfArrayIntList(int len) {
return sizeOfObject(PTR + INT) + sizeOfIntArray(len);
}
/** logarithm to the base 2. Example: log2(4) == 2, log2(8) == 3 */
private static double log2(double value) {
return Math.log(value) / Math.log(2);
}
}
}

View File

@ -631,22 +631,16 @@ public interface FieldCache {
protected final void setEstimatedSize(String size) {
this.size = size;
}
/**
* @see #estimateSize(RamUsageEstimator)
*/
public void estimateSize() {
estimateSize(new RamUsageEstimator(false)); // doesn't check for interned
}
/**
* Computes (and stores) the estimated size of the cache Value
* @see #getEstimatedSize
*/
public void estimateSize(RamUsageEstimator ramCalc) {
long size = ramCalc.estimateRamUsage(getValue());
setEstimatedSize(RamUsageEstimator.humanReadableUnits
(size, new DecimalFormat("0.#")));
public void estimateSize() {
long size = RamUsageEstimator.sizeOf(getValue());
setEstimatedSize(RamUsageEstimator.humanReadableUnits(size));
}
/**
* The most recently estimated size of the value, null unless
* estimateSize has been called.

View File

@ -1,78 +0,0 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.IdentityHashMap;
import java.util.Map;
/**
* An average, best guess, MemoryModel that should work okay on most systems.
*
*/
public class AverageGuessMemoryModel extends MemoryModel {
// best guess primitive sizes
private final Map<Class<?>,Integer> sizes = new IdentityHashMap<Class<?>,Integer>() {
{
put(boolean.class, Integer.valueOf(1));
put(byte.class, Integer.valueOf(1));
put(char.class, Integer.valueOf(2));
put(short.class, Integer.valueOf(2));
put(int.class, Integer.valueOf(4));
put(float.class, Integer.valueOf(4));
put(double.class, Integer.valueOf(8));
put(long.class, Integer.valueOf(8));
}
};
/*
* (non-Javadoc)
*
* @see org.apache.lucene.util.MemoryModel#getArraySize()
*/
@Override
public int getArraySize() {
return 16;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.util.MemoryModel#getClassSize()
*/
@Override
public int getClassSize() {
return 8;
}
/* (non-Javadoc)
* @see org.apache.lucene.util.MemoryModel#getPrimitiveSize(java.lang.Class)
*/
@Override
public int getPrimitiveSize(Class<?> clazz) {
return sizes.get(clazz).intValue();
}
/* (non-Javadoc)
* @see org.apache.lucene.util.MemoryModel#getReferenceSize()
*/
@Override
public int getReferenceSize() {
return 4;
}
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.lang.reflect.Field;
import org.apache.lucene.LucenePackage;
/**
@ -48,22 +49,36 @@ public final class Constants {
@Deprecated
public static final boolean JRE_IS_MINIMUM_JAVA6 =
new Boolean(true).booleanValue(); // prevent inlining in foreign class files
public static final boolean JRE_IS_64BIT;
public static final boolean JRE_IS_MINIMUM_JAVA7;
/** True iff running on a 64bit JVM */
public static final boolean JRE_IS_64BIT;
static {
// NOTE: this logic may not be correct; if you know of a
// more reliable approach please raise it on java-dev!
final String x = System.getProperty("sun.arch.data.model");
if (x != null) {
JRE_IS_64BIT = x.indexOf("64") != -1;
} else {
if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) {
JRE_IS_64BIT = true;
boolean is64Bit = false;
try {
final Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe");
unsafeField.setAccessible(true);
final Object unsafe = unsafeField.get(null);
final int addressSize = ((Number) unsafeClass.getMethod("addressSize")
.invoke(unsafe)).intValue();
//System.out.println("Address size: " + addressSize);
is64Bit = addressSize >= 8;
} catch (Exception e) {
final String x = System.getProperty("sun.arch.data.model");
if (x != null) {
is64Bit = x.indexOf("64") != -1;
} else {
JRE_IS_64BIT = false;
if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) {
is64Bit = true;
} else {
is64Bit = false;
}
}
}
JRE_IS_64BIT = is64Bit;
// this method only exists in Java 7:
boolean v7 = true;

View File

@ -52,16 +52,17 @@ import org.apache.lucene.search.FieldCache.CacheEntry;
*/
public final class FieldCacheSanityChecker {
private RamUsageEstimator ramCalc = null;
private boolean estimateRam;
public FieldCacheSanityChecker() {
/* NOOP */
}
/**
* If set, will be used to estimate size for all CacheEntry objects
* dealt with.
* If set, estimate size for all CacheEntry objects will be calculateed.
*/
public void setRamUsageEstimator(RamUsageEstimator r) {
ramCalc = r;
public void setRamUsageEstimator(boolean flag) {
estimateRam = flag;
}
@ -80,8 +81,7 @@ public final class FieldCacheSanityChecker {
*/
public static Insanity[] checkSanity(CacheEntry... cacheEntries) {
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
// doesn't check for interned
sanityChecker.setRamUsageEstimator(new RamUsageEstimator(false));
sanityChecker.setRamUsageEstimator(true);
return sanityChecker.check(cacheEntries);
}
@ -97,9 +97,9 @@ public final class FieldCacheSanityChecker {
if (null == cacheEntries || 0 == cacheEntries.length)
return new Insanity[0];
if (null != ramCalc) {
if (estimateRam) {
for (int i = 0; i < cacheEntries.length; i++) {
cacheEntries[i].estimateSize(ramCalc);
cacheEntries[i].estimateSize();
}
}

View File

@ -1,48 +0,0 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* Returns primitive memory sizes for estimating RAM usage.
*
*/
public abstract class MemoryModel {
/**
* @return size of array beyond contents
*/
public abstract int getArraySize();
/**
* @return Class size overhead
*/
public abstract int getClassSize();
/**
* @param clazz a primitive Class - bool, byte, char, short, long, float,
* short, double, int
* @return the size in bytes of given primitive Class
*/
public abstract int getPrimitiveSize(Class<?> clazz);
/**
* @return size of reference
*/
public abstract int getReferenceSize();
}

View File

@ -17,194 +17,453 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.lang.management.ManagementFactory;
import java.lang.reflect.*;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.*;
/**
* Estimates the size of a given Object using a given MemoryModel for primitive
* size information.
*
* Resource Usage:
*
* Internally uses a Map to temporally hold a reference to every
* object seen.
*
* If checkInterned, all Strings checked will be interned, but those
* that were not already interned will be released for GC when the
* estimate is complete.
* Estimates the size of Java objects using a simple memory model
* for primitive size information.
*
* @lucene.internal
*/
public final class RamUsageEstimator {
private RamUsageEstimator() {} // no instance
public final static int NUM_BYTES_BOOLEAN = 1;
public final static int NUM_BYTES_BYTE = 1;
public final static int NUM_BYTES_CHAR = 2;
public final static int NUM_BYTES_SHORT = 2;
public final static int NUM_BYTES_INT = 4;
public final static int NUM_BYTES_LONG = 8;
public final static int NUM_BYTES_FLOAT = 4;
public final static int NUM_BYTES_LONG = 8;
public final static int NUM_BYTES_DOUBLE = 8;
public final static int NUM_BYTES_CHAR = 2;
public final static int NUM_BYTES_OBJECT_HEADER = 8;
public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4;
public final static int NUM_BYTES_ARRAY_HEADER = NUM_BYTES_OBJECT_HEADER + NUM_BYTES_INT + NUM_BYTES_OBJECT_REF;
private MemoryModel memoryModel;
private final Map<Object,Object> seen;
private int refSize;
private int arraySize;
private int classSize;
private boolean checkInterned;
public final static int NUM_BYTES_OBJECT_REF;
public final static int NUM_BYTES_OBJECT_HEADER;
public final static int NUM_BYTES_ARRAY_HEADER;
/**
* A constant specifying the object alignment boundary inside the JVM. Objects will
* always take a full multiple of this constant, possibly wasting some space.
*/
public final static int NUM_BYTES_OBJECT_ALIGNMENT;
/**
* Constructs this object with an AverageGuessMemoryModel and
* checkInterned = true.
* Sizes of primitive classes.
*/
public RamUsageEstimator() {
this(new AverageGuessMemoryModel());
private static final Map<Class<?>,Integer> primitiveSizes;
static {
primitiveSizes = new IdentityHashMap<Class<?>,Integer>();
primitiveSizes.put(boolean.class, Integer.valueOf(NUM_BYTES_BOOLEAN));
primitiveSizes.put(byte.class, Integer.valueOf(NUM_BYTES_BYTE));
primitiveSizes.put(char.class, Integer.valueOf(NUM_BYTES_CHAR));
primitiveSizes.put(short.class, Integer.valueOf(NUM_BYTES_SHORT));
primitiveSizes.put(int.class, Integer.valueOf(NUM_BYTES_INT));
primitiveSizes.put(float.class, Integer.valueOf(NUM_BYTES_FLOAT));
primitiveSizes.put(double.class, Integer.valueOf(NUM_BYTES_DOUBLE));
primitiveSizes.put(long.class, Integer.valueOf(NUM_BYTES_LONG));
}
private final static Object theUnsafe;
private final static Method objectFieldOffsetMethod;
private final static boolean useUnsafe, isSupportedJVM;
/**
* @param checkInterned check if Strings are interned and don't add to size
* if they are. Defaults to true but if you know the objects you are checking
* won't likely contain many interned Strings, it will be faster to turn off
* intern checking.
* Initialize constants and try to collect information about the JVM internals.
*/
public RamUsageEstimator(boolean checkInterned) {
this(new AverageGuessMemoryModel(), checkInterned);
}
static {
// Initialize empirically measured defaults. We'll modify them to the current
// JVM settings later on if possible.
int referenceSize = Constants.JRE_IS_64BIT ? 8 : 4;
int objectHeader = Constants.JRE_IS_64BIT ? 16 : 8;
// The following is objectHeader + NUM_BYTES_INT, but aligned (object alignment)
// so on 64 bit JVMs it'll be align(16 + 4, @8) = 24.
int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12;
/**
* @param memoryModel MemoryModel to use for primitive object sizes.
*/
public RamUsageEstimator(MemoryModel memoryModel) {
this(memoryModel, true);
}
Object unsafe = null;
Method objectFieldOffsetM = null;
boolean supportedJvm = true;
try {
final Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe");
unsafeField.setAccessible(true);
unsafe = unsafeField.get(null);
// get object reference size by getting scale factor of Object[] arrays:
try {
final Method arrayIndexScaleM = unsafeClass.getMethod("arrayIndexScale", Class.class);
referenceSize = ((Number) arrayIndexScaleM.invoke(unsafe, Object[].class)).intValue();
} catch (Exception e) {
// ignore
supportedJvm = false;
}
// updated best guess based on reference size:
objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8;
arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12;
// get the object header size:
// - first try out if the field offsets are not scaled (see warning in Unsafe docs)
// - get the object header size by getting the field offset of the first field of a dummy object
// If the scaling is byte-wise and unsafe is available, enable dynamic size measurement for
// estimateRamUsage().
try {
objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", Field.class);
final Field dummy1Field = DummyTwoLongObject.class.getDeclaredField("dummy1");
final int ofs1 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy1Field)).intValue();
final Field dummy2Field = DummyTwoLongObject.class.getDeclaredField("dummy2");
final int ofs2 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy2Field)).intValue();
if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) {
final Field baseField = DummyOneFieldObject.class.getDeclaredField("base");
objectHeader = ((Number) objectFieldOffsetM.invoke(unsafe, baseField)).intValue();
} else {
// it is not safe to use Unsafe.objectFieldOffset(),
// as it may be scaled (see "cookie" comment in Unsafe), better use defaults
// and conventional size estimation:
objectFieldOffsetM = null;
supportedJvm = false;
}
} catch (Exception e) {
// on exception ensure useUnsafe will be set to false later:
objectFieldOffsetM = null;
supportedJvm = false;
}
/**
* @param memoryModel MemoryModel to use for primitive object sizes.
* @param checkInterned check if Strings are interned and don't add to size
* if they are. Defaults to true but if you know the objects you are checking
* won't likely contain many interned Strings, it will be faster to turn off
* intern checking.
*/
public RamUsageEstimator(MemoryModel memoryModel, boolean checkInterned) {
this.memoryModel = memoryModel;
this.checkInterned = checkInterned;
// Use Map rather than Set so that we can use an IdentityHashMap - not
// seeing an IdentityHashSet
seen = new IdentityHashMap<Object,Object>(64);
this.refSize = memoryModel.getReferenceSize();
this.arraySize = memoryModel.getArraySize();
this.classSize = memoryModel.getClassSize();
}
public long estimateRamUsage(Object obj) {
long size = size(obj);
seen.clear();
return size;
}
private long size(Object obj) {
if (obj == null) {
return 0;
// Get the array header size by retrieving the array base offset
// (offset of the first element of an array).
try {
final Method arrayBaseOffsetM = unsafeClass.getMethod("arrayBaseOffset", Class.class);
// we calculate that only for byte[] arrays, it's actually the same for all types:
arrayHeader = ((Number) arrayBaseOffsetM.invoke(unsafe, byte[].class)).intValue();
} catch (Exception e) {
// ignore
supportedJvm = false;
}
} catch (Exception e) {
// ignore
supportedJvm = false;
}
// interned not part of this object
if (checkInterned && obj instanceof String
&& obj == ((String) obj).intern()) { // interned string will be eligible
// for GC on
// estimateRamUsage(Object) return
NUM_BYTES_OBJECT_REF = referenceSize;
NUM_BYTES_OBJECT_HEADER = objectHeader;
NUM_BYTES_ARRAY_HEADER = arrayHeader;
useUnsafe = (unsafe != null && objectFieldOffsetM != null);
if (useUnsafe) {
theUnsafe = unsafe;
objectFieldOffsetMethod = objectFieldOffsetM;
} else {
theUnsafe = objectFieldOffsetMethod = null;
}
// Try to get the object alignment (the default seems to be 8 on Hotspot,
// regardless of the architecture).
int objectAlignment = 8;
try {
final Class<?> beanClazz = Class.forName("com.sun.management.HotSpotDiagnosticMXBean");
final Object hotSpotBean = ManagementFactory.newPlatformMXBeanProxy(
ManagementFactory.getPlatformMBeanServer(),
"com.sun.management:type=HotSpotDiagnostic",
beanClazz
);
final Method getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
try {
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "ObjectAlignmentInBytes");
objectAlignment = Integer.parseInt(
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString()
);
} catch (InvocationTargetException ite) {
if (!(ite.getCause() instanceof IllegalArgumentException))
throw ite;
// ignore the error completely and use default of 8 (32 bit JVMs).
}
} catch (Exception e) {
// ignore
supportedJvm = false;
}
NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment;
isSupportedJVM = supportedJvm;
}
// Object with just one field to determine the object header size by getting the offset of the dummy field:
@SuppressWarnings("unused")
private static final class DummyOneFieldObject {
public byte base;
}
// Another test object for checking, if the difference in offsets of dummy1 and dummy2 is 8 bytes.
// Only then we can be sure that those are real, unscaled offsets:
@SuppressWarnings("unused")
private static final class DummyTwoLongObject {
public long dummy1, dummy2;
}
/**
* Returns true, if the current JVM is supported by {@code RamUsageEstimator}.
* If this method returns {@code false} you are maybe using a 3rd party Java VM
* that is not supporting Oracle/Sun private APIs. The memory estimates can be
* imprecise then (no way of detecting compressed references, alignments, etc.).
* Lucene still tries to use sensible defaults.
*/
public static boolean isSupportedJVM() {
return isSupportedJVM;
}
/**
* Aligns an object size to be the next multiple of {@link #NUM_BYTES_OBJECT_ALIGNMENT}.
*/
public static long alignObjectSize(long size) {
size += (long) NUM_BYTES_OBJECT_ALIGNMENT - 1L;
return size - (size % NUM_BYTES_OBJECT_ALIGNMENT);
}
/** Returns the size in bytes of the byte[] object. */
public static long sizeOf(byte[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + arr.length);
}
/** Returns the size in bytes of the boolean[] object. */
public static long sizeOf(boolean[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + arr.length);
}
/** Returns the size in bytes of the char[] object. */
public static long sizeOf(char[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_CHAR * arr.length);
}
/** Returns the size in bytes of the short[] object. */
public static long sizeOf(short[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_SHORT * arr.length);
}
/** Returns the size in bytes of the int[] object. */
public static long sizeOf(int[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_INT * arr.length);
}
/** Returns the size in bytes of the float[] object. */
public static long sizeOf(float[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_FLOAT * arr.length);
}
/** Returns the size in bytes of the long[] object. */
public static long sizeOf(long[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_LONG * arr.length);
}
/** Returns the size in bytes of the double[] object. */
public static long sizeOf(double[] arr) {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) NUM_BYTES_DOUBLE * arr.length);
}
/**
* Estimates the RAM usage by the given object. It will
* walk the object tree and sum up all referenced objects.
*
* <p><b>Resource Usage:</b> This method internally uses a set of
* every object seen during traversals so it does allocate memory
* (it isn't side-effect free). After the method exits, this memory
* should be GCed.</p>
*/
public static long sizeOf(Object obj) {
final Set<Object> seen = Collections.newSetFromMap(new IdentityHashMap<Object,Boolean>(64));
try {
return measureObjectSize(obj, seen);
} finally {
// Help the GC.
seen.clear();
}
}
/**
* Estimates a "shallow" memory usage of the given object. For arrays, this will be the
* memory taken by array storage (no subreferences will be followed). For objects, this
* will be the memory taken by the fields.
*
* JVM object alignments are also applied.
*/
public static long shallowSizeOf(Object obj) {
if (obj == null) return 0;
final Class<?> clz = obj.getClass();
if (clz.isArray()) {
return measureArraySize(obj, null);
} else {
return shallowSizeOfInstance(clz);
}
}
/**
* Returns the shallow instance size in bytes an instance of the given class would occupy.
* This works with all conventional classes and primitive types, but not with arrays
* (the size then depends on the number of elements and varies from object to object).
* Use the array-instance methods instead.
*
* @throws IllegalArgumentException if {@code clazz} is an array class.
*/
public static long shallowSizeOfInstance(Class<?> clazz) {
if (clazz.isArray())
throw new IllegalArgumentException("This method does not work with array classes.");
if (clazz.isPrimitive())
return primitiveSizes.get(clazz);
long size = NUM_BYTES_OBJECT_HEADER;
// Walk type hierarchy
while (clazz != null) {
final Field[] fields = clazz.getDeclaredFields();
boolean fieldFound = false;
for (final Field f : fields) {
if (Modifier.isStatic(f.getModifiers())) {
continue;
}
size = reflectFieldSize(size, f);
fieldFound = true;
}
if (useUnsafe && fieldFound) {
// no need to recurse to superclasses, as all fields are
// added at the end, so we won't find any larger offset
break;
}
clazz = clazz.getSuperclass();
}
return alignObjectSize(size);
}
/**
* Recursive descend into an object.
*/
private static long measureObjectSize(Object obj, Set<Object> seen) {
if (obj == null) {
return 0;
}
// skip if we have seen before
if (seen.containsKey(obj)) {
if (seen.contains(obj)) {
return 0;
}
// add to seen
seen.put(obj, null);
seen.add(obj);
Class<?> clazz = obj.getClass();
if (clazz.isArray()) {
return sizeOfArray(obj);
return measureArraySize(obj, seen);
}
long size = 0;
long size = NUM_BYTES_OBJECT_HEADER;
long innerSize = 0L;
// walk type hierarchy
while (clazz != null) {
Field[] fields = clazz.getDeclaredFields();
for (int i = 0; i < fields.length; i++) {
if (Modifier.isStatic(fields[i].getModifiers())) {
final Field[] fields = clazz.getDeclaredFields();
for (final Field f : fields) {
if (Modifier.isStatic(f.getModifiers())) {
continue;
}
if (fields[i].getType().isPrimitive()) {
size += memoryModel.getPrimitiveSize(fields[i].getType());
} else {
size += refSize;
fields[i].setAccessible(true);
size = reflectFieldSize(size, f);
if (!f.getType().isPrimitive()) {
try {
Object value = fields[i].get(obj);
if (value != null) {
size += size(value);
}
f.setAccessible(true);
innerSize += measureObjectSize(f.get(obj), seen);
} catch (IllegalAccessException ex) {
// ignore for now?
// this should never happen as we enable setAccessible()!
throw new RuntimeException("Cannot reflect instance field: " +
f.getDeclaringClass().getName() + "#" + f.getName(), ex);
}
}
}
clazz = clazz.getSuperclass();
}
size += classSize;
return size;
return alignObjectSize(size) + innerSize;
}
private static long reflectFieldSize(long size, final Field f) {
final Class<?> type = f.getType();
final int fsize = type.isPrimitive() ? primitiveSizes.get(type) : NUM_BYTES_OBJECT_REF;
if (useUnsafe) {
try {
final long offsetPlusSize =
((Number) objectFieldOffsetMethod.invoke(theUnsafe, f)).longValue() + fsize;
return Math.max(size, offsetPlusSize);
} catch (IllegalAccessException ex) {
throw new RuntimeException("Access problem with sun.misc.Unsafe", ex);
} catch (InvocationTargetException ite) {
final Throwable cause = ite.getCause();
if (cause instanceof RuntimeException)
throw (RuntimeException) cause;
if (cause instanceof Error)
throw (Error) cause;
// this should never happen (Unsafe does not declare
// checked Exceptions for this method), but who knows?
throw new RuntimeException("Call to Unsafe's objectFieldOffset() throwed "+
"checked Exception when accessing field " +
f.getDeclaringClass().getName() + "#" + f.getName(), cause);
}
} else {
return size + fsize;
}
}
private long sizeOfArray(Object obj) {
int len = Array.getLength(obj);
if (len == 0) {
return 0;
}
long size = arraySize;
Class<?> arrayElementClazz = obj.getClass().getComponentType();
if (arrayElementClazz.isPrimitive()) {
size += len * memoryModel.getPrimitiveSize(arrayElementClazz);
} else {
for (int i = 0; i < len; i++) {
size += refSize + size(Array.get(obj, i));
/**
* Return the deep size of an <code>array</code>, including
* sub-objects if there are any.
*
* @param seen A set of already seen objects. If <code>null</code> no references
* are followed and this method returns shallow size.
*/
private static long measureArraySize(Object array, Set<Object> seen) {
long size = NUM_BYTES_ARRAY_HEADER;
final int len = Array.getLength(array);
if (len > 0) {
Class<?> arrayElementClazz = array.getClass().getComponentType();
if (arrayElementClazz.isPrimitive()) {
size += (long) len * primitiveSizes.get(arrayElementClazz);
} else {
size += (long) NUM_BYTES_OBJECT_REF * len;
if (seen != null) {
for (int i = 0; i < len; i++) {
size += measureObjectSize(Array.get(array, i), seen);
}
}
}
}
return size;
return alignObjectSize(size);
}
private static final long ONE_KB = 1024;
private static final long ONE_MB = ONE_KB * ONE_KB;
private static final long ONE_GB = ONE_KB * ONE_MB;
public static final long ONE_KB = 1024;
public static final long ONE_MB = ONE_KB * ONE_KB;
public static final long ONE_GB = ONE_KB * ONE_MB;
/**
* Return good default units based on byte size.
* Returns <code>size</code> in human-readable units (GB, MB, KB or bytes).
*/
public static String humanReadableUnits(long bytes) {
return humanReadableUnits(bytes,
new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ENGLISH)));
}
/**
* Returns <code>size</code> in human-readable units (GB, MB, KB or bytes).
*/
public static String humanReadableUnits(long bytes, DecimalFormat df) {
String newSizeAndUnits;
if (bytes / ONE_GB > 0) {
newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_GB))
+ " GB";
return df.format((float) bytes / ONE_GB) + " GB";
} else if (bytes / ONE_MB > 0) {
newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_MB))
+ " MB";
return df.format((float) bytes / ONE_MB) + " MB";
} else if (bytes / ONE_KB > 0) {
newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_KB))
+ " KB";
return df.format((float) bytes / ONE_KB) + " KB";
} else {
newSizeAndUnits = String.valueOf(bytes) + " bytes";
return bytes + " bytes";
}
return newSizeAndUnits;
}
}

View File

@ -78,8 +78,7 @@ class Direct16 extends PackedInts.ReaderImpl
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER +
values.length * RamUsageEstimator.NUM_BYTES_SHORT;
return RamUsageEstimator.sizeOf(values);
}
public void clear() {

View File

@ -74,8 +74,7 @@ class Direct32 extends PackedInts.ReaderImpl
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER +
values.length * RamUsageEstimator.NUM_BYTES_INT;
return RamUsageEstimator.sizeOf(values);
}
public void clear() {

View File

@ -70,8 +70,7 @@ class Direct64 extends PackedInts.ReaderImpl
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER +
values.length * RamUsageEstimator.NUM_BYTES_LONG;
return RamUsageEstimator.sizeOf(values);
}
public void clear() {

View File

@ -79,7 +79,7 @@ class Direct8 extends PackedInts.ReaderImpl
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.length;
return RamUsageEstimator.sizeOf(values);
}
public void clear() {

View File

@ -222,7 +222,6 @@ class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ blocks.length * RamUsageEstimator.NUM_BYTES_INT;
return RamUsageEstimator.sizeOf(blocks);
}
}

View File

@ -207,8 +207,7 @@ class Packed64 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ blocks.length * RamUsageEstimator.NUM_BYTES_LONG;
return RamUsageEstimator.sizeOf(blocks);
}
public void clear() {

View File

@ -0,0 +1,148 @@
package org.apache.lucene.util;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.lang.management.MemoryUsage;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Random;
import org.junit.Ignore;
/**
* Estimates how {@link RamUsageEstimator} estimates physical memory consumption
* of Java objects.
*/
public class StressRamUsageEstimator extends LuceneTestCase {
static class Entry {
Object o;
Entry next;
public Entry createNext(Object o) {
Entry e = new Entry();
e.o = o;
e.next = next;
this.next = e;
return e;
}
}
// This shows an easy stack overflow because we're counting recursively.
@Ignore
public void testChainedEstimation() {
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
Random rnd = random;
Entry first = new Entry();
try {
while (true) {
// Check the current memory consumption and provide the estimate.
long jvmUsed = memoryMXBean.getHeapMemoryUsage().getUsed();
long estimated = RamUsageEstimator.sizeOf(first);
System.out.println(String.format(Locale.ENGLISH, "%10d, %10d",
jvmUsed, estimated));
// Make a batch of objects.
for (int i = 0; i < 5000; i++) {
first.createNext(new byte[rnd.nextInt(1024)]);
}
}
} catch (OutOfMemoryError e) {
// Release and quit.
}
}
volatile Object guard;
// This shows an easy stack overflow because we're counting recursively.
public void testLargeSetOfByteArrays() {
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
causeGc();
long before = memoryMXBean.getHeapMemoryUsage().getUsed();
Object [] all = new Object [1000000];
for (int i = 0; i < all.length; i++) {
all[i] = new byte[random.nextInt(3)];
}
causeGc();
long after = memoryMXBean.getHeapMemoryUsage().getUsed();
System.out.println("mx: " + RamUsageEstimator.humanReadableUnits(after - before));
System.out.println("rue: " + RamUsageEstimator.humanReadableUnits(shallowSizeOf(all)));
guard = all;
}
private long shallowSizeOf(Object[] all) {
long s = RamUsageEstimator.shallowSizeOf(all);
for (Object o : all) {
s+= RamUsageEstimator.shallowSizeOf(o);
}
return s;
}
private long shallowSizeOf(Object[][] all) {
long s = RamUsageEstimator.shallowSizeOf(all);
for (Object[] o : all) {
s += RamUsageEstimator.shallowSizeOf(o);
for (Object o2 : o) {
s += RamUsageEstimator.shallowSizeOf(o2);
}
}
return s;
}
public void testSimpleByteArrays() {
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
Object [][] all = new Object [0][];
try {
while (true) {
// Check the current memory consumption and provide the estimate.
causeGc();
MemoryUsage mu = memoryMXBean.getHeapMemoryUsage();
long estimated = shallowSizeOf(all);
if (estimated > 50 * RamUsageEstimator.ONE_MB) {
break;
}
System.out.println(String.format(Locale.ENGLISH, "%10s\t%10s\t%10s",
RamUsageEstimator.humanReadableUnits(mu.getUsed()),
RamUsageEstimator.humanReadableUnits(mu.getMax()),
RamUsageEstimator.humanReadableUnits(estimated)));
// Make another batch of objects.
Object[] seg = new Object[10000];
all = Arrays.copyOf(all, all.length + 1);
all[all.length - 1] = seg;
for (int i = 0; i < seg.length; i++) {
seg[i] = new byte[random.nextInt(7)];
}
}
} catch (OutOfMemoryError e) {
// Release and quit.
}
}
/**
* Very hacky, very crude, but (sometimes) works.
* Don't look, it will burn your eyes out.
*/
private void causeGc() {
List<GarbageCollectorMXBean> garbageCollectorMXBeans = ManagementFactory.getGarbageCollectorMXBeans();
List<Long> ccounts = new ArrayList<Long>();
for (GarbageCollectorMXBean g : garbageCollectorMXBeans) {
ccounts.add(g.getCollectionCount());
}
List<Long> ccounts2 = new ArrayList<Long>();
do {
System.gc();
ccounts.clear();
for (GarbageCollectorMXBean g : garbageCollectorMXBeans) {
ccounts2.add(g.getCollectionCount());
}
} while (ccounts2.equals(ccounts));
}
}

View File

@ -1,5 +1,9 @@
package org.apache.lucene.util;
import static org.apache.lucene.util.RamUsageEstimator.*;
import java.util.Random;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -17,28 +21,93 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase;
public class TestRamUsageEstimator extends LuceneTestCase {
public void testBasic() {
RamUsageEstimator rue = new RamUsageEstimator();
rue.estimateRamUsage("test str");
rue.estimateRamUsage("test strin");
assertTrue(sizeOf(new String("test strin")) > shallowSizeOfInstance(String.class));
Holder holder = new Holder();
holder.holder = new Holder("string2", 5000L);
rue.estimateRamUsage(holder);
assertTrue(sizeOf(holder) > shallowSizeOfInstance(Holder.class));
assertTrue(sizeOf(holder) > sizeOf(holder.holder));
assertTrue(shallowSizeOfInstance(HolderSubclass.class) >= shallowSizeOfInstance(Holder.class));
assertEquals(shallowSizeOfInstance(Holder.class), shallowSizeOfInstance(HolderSubclass2.class));
String[] strings = new String[]{new String("test strin"), new String("hollow"), new String("catchmaster")};
rue.estimateRamUsage(strings);
assertTrue(sizeOf(strings) > shallowSizeOf(strings));
}
public void testStaticOverloads() {
Random rnd = random;
{
byte[] array = new byte [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
{
boolean[] array = new boolean [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
{
char[] array = new char [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
{
short[] array = new short [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
{
int[] array = new int [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
{
float[] array = new float [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
{
long[] array = new long [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
{
double[] array = new double [rnd.nextInt(1024)];
assertEquals(sizeOf(array), sizeOf((Object) array));
}
}
public void testReferenceSize() {
if (!isSupportedJVM()) {
System.err.println("WARN: Your JVM does not support the Oracle/Sun extensions (Hotspot diagnostics, sun.misc.Unsafe),");
System.err.println("so the memory estimates may be inprecise.");
System.err.println("Please report this to the Lucene mailing list, noting your JVM version: " +
Constants.JAVA_VENDOR + " " + Constants.JAVA_VERSION);
}
if (VERBOSE) {
System.out.println("This JVM is 64bit: " + Constants.JRE_IS_64BIT);
System.out.println("Reference size in this JVM: " + NUM_BYTES_OBJECT_REF);
System.out.println("Object header size in this JVM: " + NUM_BYTES_OBJECT_HEADER);
System.out.println("Array header size in this JVM: " + NUM_BYTES_ARRAY_HEADER);
System.out.println("Object alignment in this JVM: " + NUM_BYTES_OBJECT_ALIGNMENT);
}
assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8);
if (!Constants.JRE_IS_64BIT) {
assertEquals("For 32bit JVMs, reference size must always be 4", 4, NUM_BYTES_OBJECT_REF);
}
}
private static final class Holder {
@SuppressWarnings("unused")
private static class Holder {
long field1 = 5000L;
String name = "name";
Holder holder;
long field2, field3, field4;
Holder() {
}
@ -48,4 +117,14 @@ public class TestRamUsageEstimator extends LuceneTestCase {
this.field1 = field1;
}
}
@SuppressWarnings("unused")
private static class HolderSubclass extends Holder {
byte foo;
int bar;
}
private static class HolderSubclass2 extends Holder {
// empty, only inherits all fields -> size should be identical to superclass
}
}

View File

@ -131,13 +131,12 @@ public class LookupBenchmarkTest extends LuceneTestCase {
*/
public void testStorageNeeds() throws Exception {
System.err.println("-- RAM consumption");
final RamUsageEstimator rue = new RamUsageEstimator();
for (Class<? extends Lookup> cls : benchmarkClasses) {
Lookup lookup = buildLookup(cls, dictionaryInput);
System.err.println(
String.format(Locale.ENGLISH, "%-15s size[B]:%,13d",
lookup.getClass().getSimpleName(),
rue.estimateRamUsage(lookup)));
RamUsageEstimator.sizeOf(lookup)));
}
}