diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2446566ba8a..b6dd3d091e2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -164,6 +164,12 @@ Other
* LUCENE-8879: Improve BKDRadixSelector tests. (Ignacio Vera)
+======================= Lucene 8.1.2 =======================
+
+Improvements
+
+* LUCENE-8855: Add Accountable to some Query implementations (ab, Adrien Grand)
+
======================= Lucene 8.1.1 =======================
(No Changes)
diff --git a/lucene/core/src/java/org/apache/lucene/index/Term.java b/lucene/core/src/java/org/apache/lucene/index/Term.java
index feb31e080eb..4ee8b42dd09 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Term.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Term.java
@@ -23,8 +23,10 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
+import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
/**
A Term represents a word from text. This is the unit of search. It is
@@ -34,7 +36,10 @@ import org.apache.lucene.util.BytesRefBuilder;
Note that terms may represent more than words from text fields, but also
things like dates, email addresses, urls, etc. */
-public final class Term implements Comparable {
+public final class Term implements Comparable, Accountable {
+ private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(Term.class) +
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
+
String field;
BytesRef bytes;
@@ -162,4 +167,11 @@ public final class Term implements Comparable {
@Override
public final String toString() { return field + ":" + text(); }
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES +
+ RamUsageEstimator.sizeOfObject(field) +
+ (bytes != null ? bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER : 0L);
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
index a4a53442ea4..ed71c4a0dbe 100644
--- a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
@@ -22,7 +22,9 @@ import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.Operations;
@@ -45,7 +47,9 @@ import org.apache.lucene.util.automaton.Operations;
*
* @lucene.experimental
*/
-public class AutomatonQuery extends MultiTermQuery {
+public class AutomatonQuery extends MultiTermQuery implements Accountable {
+ private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(AutomatonQuery.class);
+
/** the automaton to match index terms against */
protected final Automaton automaton;
protected final CompiledAutomaton compiled;
@@ -53,6 +57,8 @@ public class AutomatonQuery extends MultiTermQuery {
protected final Term term;
protected final boolean automatonIsBinary;
+ private final long ramBytesUsed; // cache
+
/**
* Create a new AutomatonQuery from an {@link Automaton}.
*
@@ -102,6 +108,8 @@ public class AutomatonQuery extends MultiTermQuery {
this.automatonIsBinary = isBinary;
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
+
+ this.ramBytesUsed = BASE_RAM_BYTES + term.ramBytesUsed() + automaton.ramBytesUsed() + compiled.ramBytesUsed();
}
@Override
@@ -168,4 +176,9 @@ public class AutomatonQuery extends MultiTermQuery {
public boolean isAutomatonBinary() {
return automatonIsBinary;
}
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
index ae62c574ab0..f84afc20e37 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -88,7 +88,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
// approximate memory usage that we assign to all queries
// this maps roughly to a BooleanQuery with a couple term clauses
- static final long QUERY_DEFAULT_RAM_BYTES_USED = 1024;
+ static final long QUERY_DEFAULT_RAM_BYTES_USED = RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
static final long HASHTABLE_RAM_BYTES_PER_ENTRY =
2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value
diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
index 1bb5e43263e..be1585b93d5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@@ -31,10 +31,12 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
+import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
/**
* Abstract query class to find all documents whose single or multi-dimensional point values, previously indexed with e.g. {@link IntPoint},
@@ -48,13 +50,16 @@ import org.apache.lucene.util.DocIdSetBuilder;
* @see PointValues
* @lucene.experimental */
-public abstract class PointInSetQuery extends Query {
+public abstract class PointInSetQuery extends Query implements Accountable {
+ protected static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(PointInSetQuery.class);
+
// A little bit overkill for us, since all of our "terms" are always in the same field:
final PrefixCodedTerms sortedPackedPoints;
final int sortedPackedPointsHashCode;
final String field;
final int numDims;
final int bytesPerDim;
+ final long ramBytesUsed; // cache
/**
* Iterator of encoded point values.
@@ -102,6 +107,10 @@ public abstract class PointInSetQuery extends Query {
}
sortedPackedPoints = builder.finish();
sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
+ ramBytesUsed = BASE_RAM_BYTES +
+ RamUsageEstimator.sizeOfObject(field) +
+ RamUsageEstimator.sizeOfObject(sortedPackedPoints);
+
}
@Override
@@ -422,4 +431,9 @@ public abstract class PointInSetQuery extends Query {
* @return human readable value for debugging
*/
protected abstract String toString(byte[] value);
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
index bd0b8ea4343..b543886a959 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
@@ -41,7 +41,9 @@ import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
*
* @lucene.internal
**/
-public final class ByteBlockPool {
+public final class ByteBlockPool implements Accountable {
+ private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(ByteBlockPool.class);
+
public final static int BYTE_BLOCK_SHIFT = 15;
public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
public final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
@@ -392,5 +394,14 @@ public final class ByteBlockPool {
byte[] buffer = buffers[bufferIndex];
return buffer[pos];
}
+
+ @Override
+ public long ramBytesUsed() {
+ long size = BASE_RAM_BYTES;
+ for (byte[] buffer : buffers) {
+ size += RamUsageEstimator.sizeOfObject(buffer);
+ }
+ return size;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
index eb42b159408..3bd05e28ce9 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
@@ -33,9 +33,9 @@ import java.util.Arrays;
* are sorted lexicographically, numerically treating elements as unsigned.
* This is identical to Unicode codepoint order.
*/
-public final class BytesRef implements Comparable,Cloneable {
+public final class BytesRef implements Comparable, Cloneable {
/** An empty byte array for convenience */
- public static final byte[] EMPTY_BYTES = new byte[0];
+ public static final byte[] EMPTY_BYTES = new byte[0];
/** The contents of the BytesRef. Should never be {@code null}. */
public byte[] bytes;
diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
index 6200fc59f73..f017ccbd2b2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
@@ -42,7 +42,12 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
*
* @lucene.internal
*/
-public final class BytesRefHash {
+public final class BytesRefHash implements Accountable {
+ private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(BytesRefHash.class) +
+ // size of scratch1
+ RamUsageEstimator.shallowSizeOf(BytesRef.class) +
+ // size of Counter
+ RamUsageEstimator.shallowSizeOf(Counter.class);
public static final int DEFAULT_CAPACITY = 16;
@@ -472,6 +477,15 @@ public final class BytesRefHash {
return bytesStart[bytesID];
}
+ @Override
+ public long ramBytesUsed() {
+ long size = BASE_RAM_BYTES +
+ RamUsageEstimator.sizeOfObject(bytesStart) +
+ RamUsageEstimator.sizeOfObject(ids) +
+ RamUsageEstimator.sizeOfObject(pool);
+ return size;
+ }
+
/**
* Thrown if a {@link BytesRef} exceeds the {@link BytesRefHash} limit of
* {@link ByteBlockPool#BYTE_BLOCK_SIZE}-2.
diff --git a/lucene/core/src/java/org/apache/lucene/util/IntsRef.java b/lucene/core/src/java/org/apache/lucene/util/IntsRef.java
index 95cd9994ce6..b3fdbe31e6b 100644
--- a/lucene/core/src/java/org/apache/lucene/util/IntsRef.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntsRef.java
@@ -117,7 +117,7 @@ public final class IntsRef implements Comparable, Cloneable {
sb.append(']');
return sb.toString();
}
-
+
/**
* Creates a new IntsRef that points to a copy of the ints from
* other
diff --git a/lucene/core/src/java/org/apache/lucene/util/LongBitSet.java b/lucene/core/src/java/org/apache/lucene/util/LongBitSet.java
index 89b47658a1b..c1ddc96ccfd 100644
--- a/lucene/core/src/java/org/apache/lucene/util/LongBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/LongBitSet.java
@@ -26,7 +26,8 @@ import java.util.Arrays;
*
* @lucene.internal
*/
-public final class LongBitSet {
+public final class LongBitSet implements Accountable {
+ private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(LongBitSet.class);
private final long[] bits; // Array of longs holding the bits
private final long numBits; // The number of bits in use
@@ -428,4 +429,10 @@ public final class LongBitSet {
// empty sets from returning 0, which is too common.
return (int) ((h>>32) ^ h) + 0x98761234;
}
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES +
+ RamUsageEstimator.sizeOfObject(bits);
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
index 0d72e5e233c..d2a043a56aa 100644
--- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
@@ -25,10 +25,17 @@ import java.security.AccessController;
import java.security.PrivilegedAction;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
+import java.util.Collection;
+import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Locale;
import java.util.Map;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
+
/**
* Estimates the size (memory representation) of Java objects.
*
@@ -81,26 +88,42 @@ public final class RamUsageEstimator {
*/
public final static int NUM_BYTES_OBJECT_ALIGNMENT;
+ /**
+ * Approximate memory usage that we assign to all unknown queries -
+ * this maps roughly to a BooleanQuery with a couple term clauses.
+ */
+ public static final int QUERY_DEFAULT_RAM_BYTES_USED = 1024;
+
+ /**
+ * Approximate memory usage that we assign to all unknown objects -
+ * this maps roughly to a few primitive fields and a couple short String-s.
+ */
+ public static final int UNKNOWN_DEFAULT_RAM_BYTES_USED = 256;
+
/**
* Sizes of primitive classes.
*/
- private static final Map,Integer> primitiveSizes = new IdentityHashMap<>();
+ public static final Map,Integer> primitiveSizes;
+
static {
- primitiveSizes.put(boolean.class, 1);
- primitiveSizes.put(byte.class, 1);
- primitiveSizes.put(char.class, Integer.valueOf(Character.BYTES));
- primitiveSizes.put(short.class, Integer.valueOf(Short.BYTES));
- primitiveSizes.put(int.class, Integer.valueOf(Integer.BYTES));
- primitiveSizes.put(float.class, Integer.valueOf(Float.BYTES));
- primitiveSizes.put(double.class, Integer.valueOf(Double.BYTES));
- primitiveSizes.put(long.class, Integer.valueOf(Long.BYTES));
+ Map, Integer> primitiveSizesMap = new IdentityHashMap<>();
+ primitiveSizesMap.put(boolean.class, 1);
+ primitiveSizesMap.put(byte.class, 1);
+ primitiveSizesMap.put(char.class, Integer.valueOf(Character.BYTES));
+ primitiveSizesMap.put(short.class, Integer.valueOf(Short.BYTES));
+ primitiveSizesMap.put(int.class, Integer.valueOf(Integer.BYTES));
+ primitiveSizesMap.put(float.class, Integer.valueOf(Float.BYTES));
+ primitiveSizesMap.put(double.class, Integer.valueOf(Double.BYTES));
+ primitiveSizesMap.put(long.class, Integer.valueOf(Long.BYTES));
+
+ primitiveSizes = Collections.unmodifiableMap(primitiveSizesMap);
}
/**
* JVMs typically cache small longs. This tries to find out what the range is.
*/
static final long LONG_CACHE_MIN_VALUE, LONG_CACHE_MAX_VALUE;
- static final int LONG_SIZE;
+ static final int LONG_SIZE, STRING_SIZE;
/** For testing only */
static final boolean JVM_IS_HOTSPOT_64BIT;
@@ -181,8 +204,9 @@ public final class RamUsageEstimator {
LONG_CACHE_MIN_VALUE = longCacheMinValue;
LONG_CACHE_MAX_VALUE = longCacheMaxValue;
LONG_SIZE = (int) shallowSizeOfInstance(Long.class);
+ STRING_SIZE = (int) shallowSizeOfInstance(String.class);
}
-
+
/**
* Aligns an object size to be the next multiple of {@link #NUM_BYTES_OBJECT_ALIGNMENT}.
*/
@@ -242,6 +266,234 @@ public final class RamUsageEstimator {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length);
}
+ /** Returns the size in bytes of the String[] object. */
+ public static long sizeOf(String[] arr) {
+ long size = shallowSizeOf(arr);
+ for (String s : arr) {
+ if (s == null) {
+ continue;
+ }
+ size += sizeOf(s);
+ }
+ return size;
+ }
+
+ /** Recurse only into immediate descendants. */
+ public static final int MAX_DEPTH = 1;
+
+ /** Returns the size in bytes of a Map object, including sizes of its keys and values, supplying
+ * {@link #UNKNOWN_DEFAULT_RAM_BYTES_USED} when object type is not well known.
+ * This method recurses up to {@link #MAX_DEPTH}.
+ */
+ public static long sizeOfMap(Map, ?> map) {
+ return sizeOfMap(map, 0, UNKNOWN_DEFAULT_RAM_BYTES_USED);
+ }
+
+ /** Returns the size in bytes of a Map object, including sizes of its keys and values, supplying
+ * default object size when object type is not well known.
+ * This method recurses up to {@link #MAX_DEPTH}.
+ */
+ public static long sizeOfMap(Map, ?> map, long defSize) {
+ return sizeOfMap(map, 0, defSize);
+ }
+
+ private static long sizeOfMap(Map, ?> map, int depth, long defSize) {
+ if (map == null) {
+ return 0;
+ }
+ long size = shallowSizeOf(map);
+ if (depth > MAX_DEPTH) {
+ return size;
+ }
+ long sizeOfEntry = -1;
+ for (Map.Entry, ?> entry : map.entrySet()) {
+ if (sizeOfEntry == -1) {
+ sizeOfEntry = shallowSizeOf(entry);
+ }
+ size += sizeOfEntry;
+ size += sizeOfObject(entry.getKey(), depth, defSize);
+ size += sizeOfObject(entry.getValue(), depth, defSize);
+ }
+ return alignObjectSize(size);
+ }
+
+ /** Returns the size in bytes of a Collection object, including sizes of its values, supplying
+ * {@link #UNKNOWN_DEFAULT_RAM_BYTES_USED} when object type is not well known.
+ * This method recurses up to {@link #MAX_DEPTH}.
+ */
+ public static long sizeOfCollection(Collection> collection) {
+ return sizeOfCollection(collection, 0, UNKNOWN_DEFAULT_RAM_BYTES_USED);
+ }
+
+ /** Returns the size in bytes of a Collection object, including sizes of its values, supplying
+ * default object size when object type is not well known.
+ * This method recurses up to {@link #MAX_DEPTH}.
+ */
+ public static long sizeOfCollection(Collection> collection, long defSize) {
+ return sizeOfCollection(collection, 0, defSize);
+ }
+
+ private static long sizeOfCollection(Collection> collection, int depth, long defSize) {
+ if (collection == null) {
+ return 0;
+ }
+ long size = shallowSizeOf(collection);
+ if (depth > MAX_DEPTH) {
+ return size;
+ }
+ // assume array-backed collection and add per-object references
+ size += NUM_BYTES_ARRAY_HEADER * 2;
+ for (Object o : collection) {
+ size += sizeOfObject(o, depth, defSize);
+ }
+ return alignObjectSize(size);
+ }
+
+ private static final class RamUsageQueryVisitor extends QueryVisitor {
+ long total = 0;
+ long defSize;
+
+ RamUsageQueryVisitor(long defSize) {
+ this.defSize = defSize;
+ }
+
+ @Override
+ public void consumeTerms(Query query, Term... terms) {
+ if (defSize > 0) {
+ total += defSize;
+ } else {
+ total += shallowSizeOf(query);
+ }
+ if (terms != null) {
+ for (Term t : terms) {
+ total += sizeOf(t);
+ }
+ }
+ }
+
+ @Override
+ public void visitLeaf(Query query) {
+ if (query instanceof Accountable) {
+ total += ((Accountable)query).ramBytesUsed();
+ } else {
+ if (defSize > 0) {
+ total += defSize;
+ } else {
+ total += shallowSizeOf(query);
+ }
+ }
+ }
+
+ @Override
+ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
+ return this;
+ }
+ }
+
+ /**
+ * Returns the size in bytes of a Query object. Unknown query types will be estimated
+ * as {@link #QUERY_DEFAULT_RAM_BYTES_USED}.
+ */
+ public static long sizeOf(Query q) {
+ return sizeOf(q, QUERY_DEFAULT_RAM_BYTES_USED);
+ }
+
+ /**
+ * Returns the size in bytes of a Query object. Unknown query types will be estimated
+ * using {@link #shallowSizeOf(Object)}, or using the supplied defSize parameter
+ * if its value is greater than 0.
+ */
+ public static long sizeOf(Query q, long defSize) {
+ if (q instanceof Accountable) {
+ return ((Accountable)q).ramBytesUsed();
+ } else {
+ RamUsageQueryVisitor visitor = new RamUsageQueryVisitor(defSize);
+ q.visit(visitor);
+ return visitor.total;
+ }
+ }
+
+ /** Best effort attempt to estimate the size in bytes of an undetermined object. Known types
+ * will be estimated according to their formulas, and all other object sizes will be estimated
+ * as {@link #UNKNOWN_DEFAULT_RAM_BYTES_USED}.
+ */
+ public static long sizeOfObject(Object o) {
+ return sizeOfObject(o, 0, UNKNOWN_DEFAULT_RAM_BYTES_USED);
+ }
+
+ /** Best effort attempt to estimate the size in bytes of an undetermined object. Known types
+ * will be estimated according to their formulas, and all other object sizes will be estimated
+ * using {@link #shallowSizeOf(Object)}, or using the supplied defSize parameter if
+ * its value is greater than 0.
+ */
+ public static long sizeOfObject(Object o, long defSize) {
+ return sizeOfObject(o, 0, defSize);
+ }
+
+ private static long sizeOfObject(Object o, int depth, long defSize) {
+ if (o == null) {
+ return 0;
+ }
+ long size;
+ if (o instanceof Accountable) {
+ size = ((Accountable)o).ramBytesUsed();
+ } else if (o instanceof String) {
+ size = sizeOf((String)o);
+ } else if (o instanceof boolean[]) {
+ size = sizeOf((boolean[])o);
+ } else if (o instanceof byte[]) {
+ size = sizeOf((byte[])o);
+ } else if (o instanceof char[]) {
+ size = sizeOf((char[])o);
+ } else if (o instanceof double[]) {
+ size = sizeOf((double[])o);
+ } else if (o instanceof float[]) {
+ size = sizeOf((float[])o);
+ } else if (o instanceof int[]) {
+ size = sizeOf((int[])o);
+ } else if (o instanceof Long) {
+ size = sizeOf((Long)o);
+ } else if (o instanceof long[]) {
+ size = sizeOf((long[])o);
+ } else if (o instanceof short[]) {
+ size = sizeOf((short[])o);
+ } else if (o instanceof String[]) {
+ size = sizeOf((String[]) o);
+ } else if (o instanceof Query) {
+ size = sizeOf((Query)o, defSize);
+ } else if (o instanceof Map) {
+ size = sizeOfMap((Map) o, ++depth, defSize);
+ } else if (o instanceof Collection) {
+ size = sizeOfCollection((Collection)o, ++depth, defSize);
+ } else {
+ if (defSize > 0) {
+ size = defSize;
+ } else {
+ size = shallowSizeOf(o);
+ }
+ }
+ return size;
+ }
+
+ /** Returns the size in bytes of the {@link Accountable} object, using its
+ * {@link Accountable#ramBytesUsed()} method.
+ */
+ public static long sizeOf(Accountable accountable) {
+ return accountable.ramBytesUsed();
+ }
+
+ /** Returns the size in bytes of the String object. */
+ public static long sizeOf(String s) {
+ if (s == null) {
+ return 0;
+ }
+ // may not be true in Java 9+ and CompactStrings - but we have no way to determine this
+
+ // char[] + hashCode
+ long size = STRING_SIZE + (long)NUM_BYTES_ARRAY_HEADER + (long)Character.BYTES * s.length();
+ return alignObjectSize(size);
+ }
+
/** Returns the shallow size in bytes of the Object[] object. */
// Use this method instead of #shallowSizeOf(Object) to avoid costly reflection
public static long shallowSizeOf(Object[] arr) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
index 41acad4d81a..e3ad2661dbf 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
@@ -24,9 +24,11 @@ import java.util.List;
import org.apache.lucene.index.SingleTermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
@@ -37,7 +39,9 @@ import org.apache.lucene.util.UnicodeUtil;
*
* @lucene.experimental
*/
-public class CompiledAutomaton {
+public class CompiledAutomaton implements Accountable {
+ private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(CompiledAutomaton.class);
+
/**
* Automata are compiled into different internal forms for the
* most efficient execution depending upon the language they accept.
@@ -461,4 +465,15 @@ public class CompiledAutomaton {
return true;
}
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES +
+ RamUsageEstimator.sizeOfObject(automaton) +
+ RamUsageEstimator.sizeOfObject(commonSuffixRef) +
+ RamUsageEstimator.sizeOfObject(runAutomaton) +
+ RamUsageEstimator.sizeOfObject(term) +
+ RamUsageEstimator.sizeOfObject(transition);
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
index 0b1b71f2d55..a42588759f6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
@@ -31,12 +31,17 @@ package org.apache.lucene.util.automaton;
import java.util.Arrays;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.RamUsageEstimator;
+
/**
* Finite-state automaton with fast run operation. The initial state is always 0.
*
* @lucene.experimental
*/
-public abstract class RunAutomaton {
+public abstract class RunAutomaton implements Accountable {
+ private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(RunAutomaton.class);
+
final Automaton automaton;
final int alphabetSize;
final int size;
@@ -204,4 +209,14 @@ public abstract class RunAutomaton {
if (!Arrays.equals(transitions, other.transitions)) return false;
return true;
}
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES +
+ RamUsageEstimator.sizeOfObject(accept) +
+ RamUsageEstimator.sizeOfObject(automaton) +
+ RamUsageEstimator.sizeOfObject(classmap) +
+ RamUsageEstimator.sizeOfObject(points) +
+ RamUsageEstimator.sizeOfObject(transitions);
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
index 34128ad4d26..273574ff44e 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
@@ -20,9 +20,30 @@ package org.apache.lucene.util;
import static org.apache.lucene.util.RamUsageEstimator.*;
import static org.apache.lucene.util.RamUsageTester.sizeOf;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import java.util.Random;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+
public class TestRamUsageEstimator extends LuceneTestCase {
+
+ static final String[] strings = new String[] {
+ "test string",
+ "hollow",
+ "catchmaster"
+ };
+
public void testSanity() {
assertTrue(sizeOf("test string") > shallowSizeOfInstance(String.class));
@@ -36,11 +57,6 @@ public class TestRamUsageEstimator extends LuceneTestCase {
assertTrue(
shallowSizeOfInstance(Holder.class) == shallowSizeOfInstance(HolderSubclass2.class));
- String[] strings = new String[] {
- "test string",
- "hollow",
- "catchmaster"
- };
assertTrue(sizeOf(strings) > shallowSizeOf(strings));
}
@@ -86,7 +102,73 @@ public class TestRamUsageEstimator extends LuceneTestCase {
assertEquals(sizeOf(array), sizeOf((Object) array));
}
}
-
+
+ public void testStrings() {
+ long actual = sizeOf(strings);
+ long estimated = RamUsageEstimator.sizeOf(strings);
+ assertEquals(actual, estimated);
+ }
+
+ public void testBytesRefHash() {
+ BytesRefHash bytes = new BytesRefHash();
+ for (int i = 0; i < 100; i++) {
+ bytes.add(new BytesRef("foo bar " + i));
+ bytes.add(new BytesRef("baz bam " + i));
+ }
+ long actual = sizeOf(bytes);
+ long estimated = RamUsageEstimator.sizeOf(bytes);
+ assertEquals(actual, estimated);
+ }
+
+ public void testMap() {
+ Map map = new HashMap<>();
+ map.put("primitive", 1234L);
+ map.put("string", "string");
+ long actual = sizeOf(map);
+ long estimated = RamUsageEstimator.sizeOfObject(map);
+ assertTrue(estimated > actual); // RamUsageTester under-estimates the size of map
+
+ // test recursion
+ map.clear();
+ map.put("string[]", new String[]{"foo", "bar"});
+ map.put("map", Collections.singletonMap("foo", "bar"));
+ map.put("self", map);
+ actual = sizeOf(map);
+ estimated = RamUsageEstimator.sizeOfObject(map);
+ assertTrue(estimated > actual);
+ }
+
+ public void testCollection() {
+ List