LUCENE-8855: Add Accountable to some Query implementations.

This commit is contained in:
Andrzej Bialecki 2019-06-26 15:26:44 +02:00
parent 48fb45e614
commit a76c962ee6
27 changed files with 665 additions and 42 deletions

View File

@ -164,6 +164,12 @@ Other
* LUCENE-8879: Improve BKDRadixSelector tests. (Ignacio Vera) * LUCENE-8879: Improve BKDRadixSelector tests. (Ignacio Vera)
======================= Lucene 8.1.2 =======================
Improvements
* LUCENE-8855: Add Accountable to some Query implementations (ab, Adrien Grand)
======================= Lucene 8.1.1 ======================= ======================= Lucene 8.1.1 =======================
(No Changes) (No Changes)

View File

@ -23,8 +23,10 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
A Term represents a word from text. This is the unit of search. It is A Term represents a word from text. This is the unit of search. It is
@ -34,7 +36,10 @@ import org.apache.lucene.util.BytesRefBuilder;
Note that terms may represent more than words from text fields, but also Note that terms may represent more than words from text fields, but also
things like dates, email addresses, urls, etc. */ things like dates, email addresses, urls, etc. */
public final class Term implements Comparable<Term> { public final class Term implements Comparable<Term>, Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(Term.class) +
RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
String field; String field;
BytesRef bytes; BytesRef bytes;
@ -162,4 +167,11 @@ public final class Term implements Comparable<Term> {
@Override @Override
public final String toString() { return field + ":" + text(); } public final String toString() { return field + ":" + text(); }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(field) +
(bytes != null ? bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER : 0L);
}
} }

View File

@ -22,7 +22,9 @@ import java.io.IOException;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
@ -45,7 +47,9 @@ import org.apache.lucene.util.automaton.Operations;
* </p> * </p>
* @lucene.experimental * @lucene.experimental
*/ */
public class AutomatonQuery extends MultiTermQuery { public class AutomatonQuery extends MultiTermQuery implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(AutomatonQuery.class);
/** the automaton to match index terms against */ /** the automaton to match index terms against */
protected final Automaton automaton; protected final Automaton automaton;
protected final CompiledAutomaton compiled; protected final CompiledAutomaton compiled;
@ -53,6 +57,8 @@ public class AutomatonQuery extends MultiTermQuery {
protected final Term term; protected final Term term;
protected final boolean automatonIsBinary; protected final boolean automatonIsBinary;
private final long ramBytesUsed; // cache
/** /**
* Create a new AutomatonQuery from an {@link Automaton}. * Create a new AutomatonQuery from an {@link Automaton}.
* *
@ -102,6 +108,8 @@ public class AutomatonQuery extends MultiTermQuery {
this.automatonIsBinary = isBinary; this.automatonIsBinary = isBinary;
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?: // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary); this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
this.ramBytesUsed = BASE_RAM_BYTES + term.ramBytesUsed() + automaton.ramBytesUsed() + compiled.ramBytesUsed();
} }
@Override @Override
@ -168,4 +176,9 @@ public class AutomatonQuery extends MultiTermQuery {
public boolean isAutomatonBinary() { public boolean isAutomatonBinary() {
return automatonIsBinary; return automatonIsBinary;
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
} }

View File

@ -88,7 +88,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
// approximate memory usage that we assign to all queries // approximate memory usage that we assign to all queries
// this maps roughly to a BooleanQuery with a couple term clauses // this maps roughly to a BooleanQuery with a couple term clauses
static final long QUERY_DEFAULT_RAM_BYTES_USED = 1024; static final long QUERY_DEFAULT_RAM_BYTES_USED = RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
static final long HASHTABLE_RAM_BYTES_PER_ENTRY = static final long HASHTABLE_RAM_BYTES_PER_ENTRY =
2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value

View File

@ -31,10 +31,12 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PrefixCodedTerms; import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator; import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
* Abstract query class to find all documents whose single or multi-dimensional point values, previously indexed with e.g. {@link IntPoint}, * Abstract query class to find all documents whose single or multi-dimensional point values, previously indexed with e.g. {@link IntPoint},
@ -48,13 +50,16 @@ import org.apache.lucene.util.DocIdSetBuilder;
* @see PointValues * @see PointValues
* @lucene.experimental */ * @lucene.experimental */
public abstract class PointInSetQuery extends Query { public abstract class PointInSetQuery extends Query implements Accountable {
protected static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(PointInSetQuery.class);
// A little bit overkill for us, since all of our "terms" are always in the same field: // A little bit overkill for us, since all of our "terms" are always in the same field:
final PrefixCodedTerms sortedPackedPoints; final PrefixCodedTerms sortedPackedPoints;
final int sortedPackedPointsHashCode; final int sortedPackedPointsHashCode;
final String field; final String field;
final int numDims; final int numDims;
final int bytesPerDim; final int bytesPerDim;
final long ramBytesUsed; // cache
/** /**
* Iterator of encoded point values. * Iterator of encoded point values.
@ -102,6 +107,10 @@ public abstract class PointInSetQuery extends Query {
} }
sortedPackedPoints = builder.finish(); sortedPackedPoints = builder.finish();
sortedPackedPointsHashCode = sortedPackedPoints.hashCode(); sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(field) +
RamUsageEstimator.sizeOfObject(sortedPackedPoints);
} }
@Override @Override
@ -422,4 +431,9 @@ public abstract class PointInSetQuery extends Query {
* @return human readable value for debugging * @return human readable value for debugging
*/ */
protected abstract String toString(byte[] value); protected abstract String toString(byte[] value);
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
} }

View File

@ -41,7 +41,9 @@ import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
* *
* @lucene.internal * @lucene.internal
**/ **/
public final class ByteBlockPool { public final class ByteBlockPool implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(ByteBlockPool.class);
public final static int BYTE_BLOCK_SHIFT = 15; public final static int BYTE_BLOCK_SHIFT = 15;
public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT; public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
public final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1; public final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
@ -392,5 +394,14 @@ public final class ByteBlockPool {
byte[] buffer = buffers[bufferIndex]; byte[] buffer = buffers[bufferIndex];
return buffer[pos]; return buffer[pos];
} }
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES;
for (byte[] buffer : buffers) {
size += RamUsageEstimator.sizeOfObject(buffer);
}
return size;
}
} }

View File

@ -42,7 +42,12 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
* *
* @lucene.internal * @lucene.internal
*/ */
public final class BytesRefHash { public final class BytesRefHash implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(BytesRefHash.class) +
// size of scratch1
RamUsageEstimator.shallowSizeOf(BytesRef.class) +
// size of Counter
RamUsageEstimator.shallowSizeOf(Counter.class);
public static final int DEFAULT_CAPACITY = 16; public static final int DEFAULT_CAPACITY = 16;
@ -472,6 +477,15 @@ public final class BytesRefHash {
return bytesStart[bytesID]; return bytesStart[bytesID];
} }
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(bytesStart) +
RamUsageEstimator.sizeOfObject(ids) +
RamUsageEstimator.sizeOfObject(pool);
return size;
}
/** /**
* Thrown if a {@link BytesRef} exceeds the {@link BytesRefHash} limit of * Thrown if a {@link BytesRef} exceeds the {@link BytesRefHash} limit of
* {@link ByteBlockPool#BYTE_BLOCK_SIZE}-2. * {@link ByteBlockPool#BYTE_BLOCK_SIZE}-2.

View File

@ -26,7 +26,8 @@ import java.util.Arrays;
* *
* @lucene.internal * @lucene.internal
*/ */
public final class LongBitSet { public final class LongBitSet implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(LongBitSet.class);
private final long[] bits; // Array of longs holding the bits private final long[] bits; // Array of longs holding the bits
private final long numBits; // The number of bits in use private final long numBits; // The number of bits in use
@ -428,4 +429,10 @@ public final class LongBitSet {
// empty sets from returning 0, which is too common. // empty sets from returning 0, which is too common.
return (int) ((h>>32) ^ h) + 0x98761234; return (int) ((h>>32) ^ h) + 0x98761234;
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(bits);
}
} }

View File

@ -25,10 +25,17 @@ import java.security.AccessController;
import java.security.PrivilegedAction; import java.security.PrivilegedAction;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols; import java.text.DecimalFormatSymbols;
import java.util.Collection;
import java.util.Collections;
import java.util.IdentityHashMap; import java.util.IdentityHashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
/** /**
* Estimates the size (memory representation) of Java objects. * Estimates the size (memory representation) of Java objects.
* <p> * <p>
@ -81,26 +88,42 @@ public final class RamUsageEstimator {
*/ */
public final static int NUM_BYTES_OBJECT_ALIGNMENT; public final static int NUM_BYTES_OBJECT_ALIGNMENT;
/**
* Approximate memory usage that we assign to all unknown queries -
* this maps roughly to a BooleanQuery with a couple term clauses.
*/
public static final int QUERY_DEFAULT_RAM_BYTES_USED = 1024;
/**
* Approximate memory usage that we assign to all unknown objects -
* this maps roughly to a few primitive fields and a couple short String-s.
*/
public static final int UNKNOWN_DEFAULT_RAM_BYTES_USED = 256;
/** /**
* Sizes of primitive classes. * Sizes of primitive classes.
*/ */
private static final Map<Class<?>,Integer> primitiveSizes = new IdentityHashMap<>(); public static final Map<Class<?>,Integer> primitiveSizes;
static { static {
primitiveSizes.put(boolean.class, 1); Map<Class<?>, Integer> primitiveSizesMap = new IdentityHashMap<>();
primitiveSizes.put(byte.class, 1); primitiveSizesMap.put(boolean.class, 1);
primitiveSizes.put(char.class, Integer.valueOf(Character.BYTES)); primitiveSizesMap.put(byte.class, 1);
primitiveSizes.put(short.class, Integer.valueOf(Short.BYTES)); primitiveSizesMap.put(char.class, Integer.valueOf(Character.BYTES));
primitiveSizes.put(int.class, Integer.valueOf(Integer.BYTES)); primitiveSizesMap.put(short.class, Integer.valueOf(Short.BYTES));
primitiveSizes.put(float.class, Integer.valueOf(Float.BYTES)); primitiveSizesMap.put(int.class, Integer.valueOf(Integer.BYTES));
primitiveSizes.put(double.class, Integer.valueOf(Double.BYTES)); primitiveSizesMap.put(float.class, Integer.valueOf(Float.BYTES));
primitiveSizes.put(long.class, Integer.valueOf(Long.BYTES)); primitiveSizesMap.put(double.class, Integer.valueOf(Double.BYTES));
primitiveSizesMap.put(long.class, Integer.valueOf(Long.BYTES));
primitiveSizes = Collections.unmodifiableMap(primitiveSizesMap);
} }
/** /**
* JVMs typically cache small longs. This tries to find out what the range is. * JVMs typically cache small longs. This tries to find out what the range is.
*/ */
static final long LONG_CACHE_MIN_VALUE, LONG_CACHE_MAX_VALUE; static final long LONG_CACHE_MIN_VALUE, LONG_CACHE_MAX_VALUE;
static final int LONG_SIZE; static final int LONG_SIZE, STRING_SIZE;
/** For testing only */ /** For testing only */
static final boolean JVM_IS_HOTSPOT_64BIT; static final boolean JVM_IS_HOTSPOT_64BIT;
@ -181,6 +204,7 @@ public final class RamUsageEstimator {
LONG_CACHE_MIN_VALUE = longCacheMinValue; LONG_CACHE_MIN_VALUE = longCacheMinValue;
LONG_CACHE_MAX_VALUE = longCacheMaxValue; LONG_CACHE_MAX_VALUE = longCacheMaxValue;
LONG_SIZE = (int) shallowSizeOfInstance(Long.class); LONG_SIZE = (int) shallowSizeOfInstance(Long.class);
STRING_SIZE = (int) shallowSizeOfInstance(String.class);
} }
/** /**
@ -242,6 +266,234 @@ public final class RamUsageEstimator {
return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length); return alignObjectSize((long) NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length);
} }
/** Returns the size in bytes of the String[] object. */
public static long sizeOf(String[] arr) {
long size = shallowSizeOf(arr);
for (String s : arr) {
if (s == null) {
continue;
}
size += sizeOf(s);
}
return size;
}
/** Recurse only into immediate descendants. */
public static final int MAX_DEPTH = 1;
/** Returns the size in bytes of a Map object, including sizes of its keys and values, supplying
* {@link #UNKNOWN_DEFAULT_RAM_BYTES_USED} when object type is not well known.
* This method recurses up to {@link #MAX_DEPTH}.
*/
public static long sizeOfMap(Map<?, ?> map) {
return sizeOfMap(map, 0, UNKNOWN_DEFAULT_RAM_BYTES_USED);
}
/** Returns the size in bytes of a Map object, including sizes of its keys and values, supplying
* default object size when object type is not well known.
* This method recurses up to {@link #MAX_DEPTH}.
*/
public static long sizeOfMap(Map<?, ?> map, long defSize) {
return sizeOfMap(map, 0, defSize);
}
private static long sizeOfMap(Map<?, ?> map, int depth, long defSize) {
if (map == null) {
return 0;
}
long size = shallowSizeOf(map);
if (depth > MAX_DEPTH) {
return size;
}
long sizeOfEntry = -1;
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (sizeOfEntry == -1) {
sizeOfEntry = shallowSizeOf(entry);
}
size += sizeOfEntry;
size += sizeOfObject(entry.getKey(), depth, defSize);
size += sizeOfObject(entry.getValue(), depth, defSize);
}
return alignObjectSize(size);
}
/** Returns the size in bytes of a Collection object, including sizes of its values, supplying
* {@link #UNKNOWN_DEFAULT_RAM_BYTES_USED} when object type is not well known.
* This method recurses up to {@link #MAX_DEPTH}.
*/
public static long sizeOfCollection(Collection<?> collection) {
return sizeOfCollection(collection, 0, UNKNOWN_DEFAULT_RAM_BYTES_USED);
}
/** Returns the size in bytes of a Collection object, including sizes of its values, supplying
* default object size when object type is not well known.
* This method recurses up to {@link #MAX_DEPTH}.
*/
public static long sizeOfCollection(Collection<?> collection, long defSize) {
return sizeOfCollection(collection, 0, defSize);
}
private static long sizeOfCollection(Collection<?> collection, int depth, long defSize) {
if (collection == null) {
return 0;
}
long size = shallowSizeOf(collection);
if (depth > MAX_DEPTH) {
return size;
}
// assume array-backed collection and add per-object references
size += NUM_BYTES_ARRAY_HEADER * 2;
for (Object o : collection) {
size += sizeOfObject(o, depth, defSize);
}
return alignObjectSize(size);
}
private static final class RamUsageQueryVisitor extends QueryVisitor {
long total = 0;
long defSize;
RamUsageQueryVisitor(long defSize) {
this.defSize = defSize;
}
@Override
public void consumeTerms(Query query, Term... terms) {
if (defSize > 0) {
total += defSize;
} else {
total += shallowSizeOf(query);
}
if (terms != null) {
for (Term t : terms) {
total += sizeOf(t);
}
}
}
@Override
public void visitLeaf(Query query) {
if (query instanceof Accountable) {
total += ((Accountable)query).ramBytesUsed();
} else {
if (defSize > 0) {
total += defSize;
} else {
total += shallowSizeOf(query);
}
}
}
@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
return this;
}
}
/**
* Returns the size in bytes of a Query object. Unknown query types will be estimated
* as {@link #QUERY_DEFAULT_RAM_BYTES_USED}.
*/
public static long sizeOf(Query q) {
return sizeOf(q, QUERY_DEFAULT_RAM_BYTES_USED);
}
/**
* Returns the size in bytes of a Query object. Unknown query types will be estimated
* using {@link #shallowSizeOf(Object)}, or using the supplied <code>defSize</code> parameter
* if its value is greater than 0.
*/
public static long sizeOf(Query q, long defSize) {
if (q instanceof Accountable) {
return ((Accountable)q).ramBytesUsed();
} else {
RamUsageQueryVisitor visitor = new RamUsageQueryVisitor(defSize);
q.visit(visitor);
return visitor.total;
}
}
/** Best effort attempt to estimate the size in bytes of an undetermined object. Known types
* will be estimated according to their formulas, and all other object sizes will be estimated
* as {@link #UNKNOWN_DEFAULT_RAM_BYTES_USED}.
*/
public static long sizeOfObject(Object o) {
return sizeOfObject(o, 0, UNKNOWN_DEFAULT_RAM_BYTES_USED);
}
/** Best effort attempt to estimate the size in bytes of an undetermined object. Known types
* will be estimated according to their formulas, and all other object sizes will be estimated
* using {@link #shallowSizeOf(Object)}, or using the supplied <code>defSize</code> parameter if
* its value is greater than 0.
*/
public static long sizeOfObject(Object o, long defSize) {
return sizeOfObject(o, 0, defSize);
}
private static long sizeOfObject(Object o, int depth, long defSize) {
if (o == null) {
return 0;
}
long size;
if (o instanceof Accountable) {
size = ((Accountable)o).ramBytesUsed();
} else if (o instanceof String) {
size = sizeOf((String)o);
} else if (o instanceof boolean[]) {
size = sizeOf((boolean[])o);
} else if (o instanceof byte[]) {
size = sizeOf((byte[])o);
} else if (o instanceof char[]) {
size = sizeOf((char[])o);
} else if (o instanceof double[]) {
size = sizeOf((double[])o);
} else if (o instanceof float[]) {
size = sizeOf((float[])o);
} else if (o instanceof int[]) {
size = sizeOf((int[])o);
} else if (o instanceof Long) {
size = sizeOf((Long)o);
} else if (o instanceof long[]) {
size = sizeOf((long[])o);
} else if (o instanceof short[]) {
size = sizeOf((short[])o);
} else if (o instanceof String[]) {
size = sizeOf((String[]) o);
} else if (o instanceof Query) {
size = sizeOf((Query)o, defSize);
} else if (o instanceof Map) {
size = sizeOfMap((Map) o, ++depth, defSize);
} else if (o instanceof Collection) {
size = sizeOfCollection((Collection)o, ++depth, defSize);
} else {
if (defSize > 0) {
size = defSize;
} else {
size = shallowSizeOf(o);
}
}
return size;
}
/** Returns the size in bytes of the {@link Accountable} object, using its
* {@link Accountable#ramBytesUsed()} method.
*/
public static long sizeOf(Accountable accountable) {
return accountable.ramBytesUsed();
}
/** Returns the size in bytes of the String object. */
public static long sizeOf(String s) {
if (s == null) {
return 0;
}
// may not be true in Java 9+ and CompactStrings - but we have no way to determine this
// char[] + hashCode
long size = STRING_SIZE + (long)NUM_BYTES_ARRAY_HEADER + (long)Character.BYTES * s.length();
return alignObjectSize(size);
}
/** Returns the shallow size in bytes of the Object[] object. */ /** Returns the shallow size in bytes of the Object[] object. */
// Use this method instead of #shallowSizeOf(Object) to avoid costly reflection // Use this method instead of #shallowSizeOf(Object) to avoid costly reflection
public static long shallowSizeOf(Object[] arr) { public static long shallowSizeOf(Object[] arr) {

View File

@ -24,9 +24,11 @@ import java.util.List;
import org.apache.lucene.index.SingleTermsEnum; import org.apache.lucene.index.SingleTermsEnum;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
@ -37,7 +39,9 @@ import org.apache.lucene.util.UnicodeUtil;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class CompiledAutomaton { public class CompiledAutomaton implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(CompiledAutomaton.class);
/** /**
* Automata are compiled into different internal forms for the * Automata are compiled into different internal forms for the
* most efficient execution depending upon the language they accept. * most efficient execution depending upon the language they accept.
@ -461,4 +465,15 @@ public class CompiledAutomaton {
return true; return true;
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(automaton) +
RamUsageEstimator.sizeOfObject(commonSuffixRef) +
RamUsageEstimator.sizeOfObject(runAutomaton) +
RamUsageEstimator.sizeOfObject(term) +
RamUsageEstimator.sizeOfObject(transition);
}
} }

View File

@ -31,12 +31,17 @@ package org.apache.lucene.util.automaton;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
* Finite-state automaton with fast run operation. The initial state is always 0. * Finite-state automaton with fast run operation. The initial state is always 0.
* *
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class RunAutomaton { public abstract class RunAutomaton implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(RunAutomaton.class);
final Automaton automaton; final Automaton automaton;
final int alphabetSize; final int alphabetSize;
final int size; final int size;
@ -204,4 +209,14 @@ public abstract class RunAutomaton {
if (!Arrays.equals(transitions, other.transitions)) return false; if (!Arrays.equals(transitions, other.transitions)) return false;
return true; return true;
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(accept) +
RamUsageEstimator.sizeOfObject(automaton) +
RamUsageEstimator.sizeOfObject(classmap) +
RamUsageEstimator.sizeOfObject(points) +
RamUsageEstimator.sizeOfObject(transitions);
}
} }

View File

@ -20,9 +20,30 @@ package org.apache.lucene.util;
import static org.apache.lucene.util.RamUsageEstimator.*; import static org.apache.lucene.util.RamUsageEstimator.*;
import static org.apache.lucene.util.RamUsageTester.sizeOf; import static org.apache.lucene.util.RamUsageTester.sizeOf;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random; import java.util.Random;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
public class TestRamUsageEstimator extends LuceneTestCase { public class TestRamUsageEstimator extends LuceneTestCase {
static final String[] strings = new String[] {
"test string",
"hollow",
"catchmaster"
};
public void testSanity() { public void testSanity() {
assertTrue(sizeOf("test string") > shallowSizeOfInstance(String.class)); assertTrue(sizeOf("test string") > shallowSizeOfInstance(String.class));
@ -36,11 +57,6 @@ public class TestRamUsageEstimator extends LuceneTestCase {
assertTrue( assertTrue(
shallowSizeOfInstance(Holder.class) == shallowSizeOfInstance(HolderSubclass2.class)); shallowSizeOfInstance(Holder.class) == shallowSizeOfInstance(HolderSubclass2.class));
String[] strings = new String[] {
"test string",
"hollow",
"catchmaster"
};
assertTrue(sizeOf(strings) > shallowSizeOf(strings)); assertTrue(sizeOf(strings) > shallowSizeOf(strings));
} }
@ -87,6 +103,72 @@ public class TestRamUsageEstimator extends LuceneTestCase {
} }
} }
public void testStrings() {
long actual = sizeOf(strings);
long estimated = RamUsageEstimator.sizeOf(strings);
assertEquals(actual, estimated);
}
public void testBytesRefHash() {
BytesRefHash bytes = new BytesRefHash();
for (int i = 0; i < 100; i++) {
bytes.add(new BytesRef("foo bar " + i));
bytes.add(new BytesRef("baz bam " + i));
}
long actual = sizeOf(bytes);
long estimated = RamUsageEstimator.sizeOf(bytes);
assertEquals(actual, estimated);
}
public void testMap() {
Map<String, Object> map = new HashMap<>();
map.put("primitive", 1234L);
map.put("string", "string");
long actual = sizeOf(map);
long estimated = RamUsageEstimator.sizeOfObject(map);
assertTrue(estimated > actual); // RamUsageTester under-estimates the size of map
// test recursion
map.clear();
map.put("string[]", new String[]{"foo", "bar"});
map.put("map", Collections.singletonMap("foo", "bar"));
map.put("self", map);
actual = sizeOf(map);
estimated = RamUsageEstimator.sizeOfObject(map);
assertTrue(estimated > actual);
}
public void testCollection() {
List<Object> list = new ArrayList<>();
list.add(1234L);
list.add("string");
list.add(new Term("foo", "bar"));
long actual = sizeOf(list);
long estimated = RamUsageEstimator.sizeOfObject(list);
assertEquals(actual, estimated);
// test recursion
list.clear();
list.add(1234L);
list.add(list);
actual = sizeOf(list);
estimated = RamUsageEstimator.sizeOfObject(list);
assertEquals(actual + RamUsageEstimator.shallowSizeOf(list), estimated);
}
public void testQuery() {
DisjunctionMaxQuery dismax = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("foo", "bar")), new TermQuery(new Term("baz", "bam"))), 1.0f);
BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD)
.add(new FuzzyQuery(new Term("foo", "baz")), BooleanClause.Occur.MUST_NOT)
.add(dismax, BooleanClause.Occur.MUST)
.build();
long actual = sizeOf(bq);
long estimated = RamUsageEstimator.sizeOfObject(bq);
assertTrue(actual < estimated);
}
public void testReferenceSize() { public void testReferenceSize() {
assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8); assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8);
if (Constants.JRE_IS_64BIT) { if (Constants.JRE_IS_64BIT) {

View File

@ -32,11 +32,14 @@ import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet; import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
final class GlobalOrdinalsQuery extends Query { final class GlobalOrdinalsQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(GlobalOrdinalsQuery.class);
// All the ords of matching docs found with OrdinalsCollector. // All the ords of matching docs found with OrdinalsCollector.
private final LongBitSet foundOrds; private final LongBitSet foundOrds;
@ -50,6 +53,8 @@ final class GlobalOrdinalsQuery extends Query {
// id of the context rather than the context itself in order not to hold references to index readers // id of the context rather than the context itself in order not to hold references to index readers
private final Object indexReaderContextId; private final Object indexReaderContextId;
private final long ramBytesUsed; // cache
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, OrdinalMap globalOrds, Query toQuery, GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, OrdinalMap globalOrds, Query toQuery,
Query fromQuery, Object indexReaderContextId) { Query fromQuery, Object indexReaderContextId) {
this.foundOrds = foundOrds; this.foundOrds = foundOrds;
@ -58,6 +63,13 @@ final class GlobalOrdinalsQuery extends Query {
this.toQuery = toQuery; this.toQuery = toQuery;
this.fromQuery = fromQuery; this.fromQuery = fromQuery;
this.indexReaderContextId = indexReaderContextId; this.indexReaderContextId = indexReaderContextId;
this.ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(this.foundOrds) +
RamUsageEstimator.sizeOfObject(this.globalOrds) +
RamUsageEstimator.sizeOfObject(this.joinField) +
RamUsageEstimator.sizeOfObject(this.fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
RamUsageEstimator.sizeOfObject(this.toQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED);
} }
@Override @Override
@ -103,6 +115,11 @@ final class GlobalOrdinalsQuery extends Query {
'}'; '}';
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
final class W extends ConstantScoreWeight { final class W extends ConstantScoreWeight {
private final Weight approximationWeight; private final Weight approximationWeight;

View File

@ -32,10 +32,13 @@ import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
final class GlobalOrdinalsWithScoreQuery extends Query { final class GlobalOrdinalsWithScoreQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(GlobalOrdinalsWithScoreQuery.class);
private final GlobalOrdinalsWithScoreCollector collector; private final GlobalOrdinalsWithScoreCollector collector;
private final String joinField; private final String joinField;
@ -51,6 +54,8 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
// id of the context rather than the context itself in order not to hold references to index readers // id of the context rather than the context itself in order not to hold references to index readers
private final Object indexReaderContextId; private final Object indexReaderContextId;
private final long ramBytesUsed; // cache
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, ScoreMode scoreMode, String joinField, GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, ScoreMode scoreMode, String joinField,
OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max, OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max,
Object indexReaderContextId) { Object indexReaderContextId) {
@ -63,6 +68,12 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
this.min = min; this.min = min;
this.max = max; this.max = max;
this.indexReaderContextId = indexReaderContextId; this.indexReaderContextId = indexReaderContextId;
this.ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(this.fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
RamUsageEstimator.sizeOfObject(this.globalOrds) +
RamUsageEstimator.sizeOfObject(this.joinField) +
RamUsageEstimator.sizeOfObject(this.toQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED);
} }
@Override @Override
@ -124,6 +135,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
'}'; '}';
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
final class W extends FilterWeight { final class W extends FilterWeight {
W(Query query, Weight approximationWeight) { W(Query query, Weight approximationWeight) {

View File

@ -43,13 +43,16 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
// A TermsIncludingScoreQuery variant for point values: // A TermsIncludingScoreQuery variant for point values:
abstract class PointInSetIncludingScoreQuery extends Query { abstract class PointInSetIncludingScoreQuery extends Query implements Accountable {
protected static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(PointInSetIncludingScoreQuery.class);
static BiFunction<byte[], Class<? extends Number>, String> toString = (value, numericType) -> { static BiFunction<byte[], Class<? extends Number>, String> toString = (value, numericType) -> {
if (Integer.class.equals(numericType)) { if (Integer.class.equals(numericType)) {
@ -75,6 +78,8 @@ abstract class PointInSetIncludingScoreQuery extends Query {
final List<Float> aggregatedJoinScores; final List<Float> aggregatedJoinScores;
private final long ramBytesUsed; // cache
static abstract class Stream extends PointInSetQuery.Stream { static abstract class Stream extends PointInSetQuery.Stream {
float score; float score;
@ -116,6 +121,11 @@ abstract class PointInSetIncludingScoreQuery extends Query {
} }
sortedPackedPoints = builder.finish(); sortedPackedPoints = builder.finish();
sortedPackedPointsHashCode = sortedPackedPoints.hashCode(); sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
this.ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(this.field) +
RamUsageEstimator.sizeOfObject(this.originalQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
RamUsageEstimator.sizeOfObject(this.sortedPackedPoints);
} }
@Override @Override
@ -335,4 +345,9 @@ abstract class PointInSetIncludingScoreQuery extends Query {
} }
protected abstract String toString(byte[] value); protected abstract String toString(byte[] value);
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
} }

View File

@ -31,12 +31,15 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
class TermsIncludingScoreQuery extends Query { class TermsIncludingScoreQuery extends Query implements Accountable {
protected static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(TermsIncludingScoreQuery.class);
private final ScoreMode scoreMode; private final ScoreMode scoreMode;
private final String toField; private final String toField;
@ -51,6 +54,8 @@ class TermsIncludingScoreQuery extends Query {
// id of the context rather than the context itself in order not to hold references to index readers // id of the context rather than the context itself in order not to hold references to index readers
private final Object topReaderContextId; private final Object topReaderContextId;
private final long ramBytesUsed; // cache
TermsIncludingScoreQuery(ScoreMode scoreMode, String toField, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, TermsIncludingScoreQuery(ScoreMode scoreMode, String toField, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores,
String fromField, Query fromQuery, Object indexReaderContextId) { String fromField, Query fromQuery, Object indexReaderContextId) {
this.scoreMode = scoreMode; this.scoreMode = scoreMode;
@ -63,6 +68,14 @@ class TermsIncludingScoreQuery extends Query {
this.fromField = fromField; this.fromField = fromField;
this.fromQuery = fromQuery; this.fromQuery = fromQuery;
this.topReaderContextId = indexReaderContextId; this.topReaderContextId = indexReaderContextId;
this.ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(fromField) +
RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
RamUsageEstimator.sizeOfObject(ords) +
RamUsageEstimator.sizeOfObject(scores) +
RamUsageEstimator.sizeOfObject(terms) +
RamUsageEstimator.sizeOfObject(toField);
} }
@Override @Override
@ -96,6 +109,11 @@ class TermsIncludingScoreQuery extends Query {
return classHash() + Objects.hash(scoreMode, toField, fromField, fromQuery, topReaderContextId); return classHash() + Objects.hash(scoreMode, toField, fromField, fromQuery, topReaderContextId);
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override @Override
public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode scoreMode, float boost) throws IOException { public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode scoreMode, float boost) throws IOException {
if (scoreMode.needsScores() == false) { if (scoreMode.needsScores() == false) {

View File

@ -24,8 +24,10 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
* A query that has an array of terms from a specific field. This query will match documents have one or more terms in * A query that has an array of terms from a specific field. This query will match documents have one or more terms in
@ -33,7 +35,8 @@ import org.apache.lucene.util.BytesRefHash;
* *
* @lucene.experimental * @lucene.experimental
*/ */
class TermsQuery extends MultiTermQuery { class TermsQuery extends MultiTermQuery implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(TermsQuery.class);
private final BytesRefHash terms; private final BytesRefHash terms;
private final int[] ords; private final int[] ords;
@ -44,6 +47,8 @@ class TermsQuery extends MultiTermQuery {
// id of the context rather than the context itself in order not to hold references to index readers // id of the context rather than the context itself in order not to hold references to index readers
private final Object indexReaderContextId; private final Object indexReaderContextId;
private final long ramBytesUsed; // cache
/** /**
* @param toField The field that should contain terms that are specified in the next parameter. * @param toField The field that should contain terms that are specified in the next parameter.
* @param terms The terms that matching documents should have. The terms must be sorted by natural order. * @param terms The terms that matching documents should have. The terms must be sorted by natural order.
@ -56,6 +61,13 @@ class TermsQuery extends MultiTermQuery {
this.fromField = fromField; this.fromField = fromField;
this.fromQuery = fromQuery; this.fromQuery = fromQuery;
this.indexReaderContextId = indexReaderContextId; this.indexReaderContextId = indexReaderContextId;
this.ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(field) +
RamUsageEstimator.sizeOfObject(fromField) +
RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
RamUsageEstimator.sizeOfObject(ords) +
RamUsageEstimator.sizeOfObject(terms);
} }
@Override @Override
@ -102,4 +114,8 @@ class TermsQuery extends MultiTermQuery {
return classHash() + Objects.hash(field, fromField, fromQuery, indexReaderContextId); return classHash() + Objects.hash(field, fromField, fromQuery, indexReaderContextId);
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
} }

View File

@ -37,7 +37,9 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.SimilarityBase; import org.apache.lucene.search.similarities.SimilarityBase;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
* A {@link Query} that treats multiple fields as a single stream and scores * A {@link Query} that treats multiple fields as a single stream and scores
@ -53,7 +55,8 @@ import org.apache.lucene.util.BytesRef;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public final class BM25FQuery extends Query { public final class BM25FQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(BM25FQuery.class);
/** /**
* A builder for {@link BM25FQuery}. * A builder for {@link BM25FQuery}.
@ -143,6 +146,8 @@ public final class BM25FQuery extends Query {
// array of terms per field, sorted // array of terms per field, sorted
private final Term fieldTerms[]; private final Term fieldTerms[];
private final long ramBytesUsed;
private BM25FQuery(BM25Similarity similarity, TreeMap<String, FieldAndWeight> fieldAndWeights, BytesRef[] terms) { private BM25FQuery(BM25Similarity similarity, TreeMap<String, FieldAndWeight> fieldAndWeights, BytesRef[] terms) {
this.similarity = similarity; this.similarity = similarity;
this.fieldAndWeights = fieldAndWeights; this.fieldAndWeights = fieldAndWeights;
@ -159,6 +164,11 @@ public final class BM25FQuery extends Query {
fieldTerms[pos++] = new Term(field, term); fieldTerms[pos++] = new Term(field, term);
} }
} }
this.ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(fieldAndWeights) +
RamUsageEstimator.sizeOfObject(fieldTerms) +
RamUsageEstimator.sizeOfObject(terms);
} }
public List<Term> getTerms() { public List<Term> getTerms() {
@ -202,6 +212,11 @@ public final class BM25FQuery extends Query {
Arrays.equals(terms, ((BM25FQuery) other).terms); Arrays.equals(terms, ((BM25FQuery) other).terms);
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override @Override
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
// optimize zero and single field cases // optimize zero and single field cases

View File

@ -25,17 +25,21 @@ import java.util.stream.Collectors;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/** A {@link Query} that allows to have a configurable number or required /** A {@link Query} that allows to have a configurable number or required
* matches per document. This is typically useful in order to build queries * matches per document. This is typically useful in order to build queries
* whose query terms must all appear in documents. * whose query terms must all appear in documents.
* @lucene.experimental * @lucene.experimental
*/ */
public final class CoveringQuery extends Query { public final class CoveringQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(CoveringQuery.class);
private final Collection<Query> queries; private final Collection<Query> queries;
private final LongValuesSource minimumNumberMatch; private final LongValuesSource minimumNumberMatch;
private final int hashCode; private final int hashCode;
private final long ramBytesUsed;
/** /**
* Sole constructor. * Sole constructor.
@ -58,6 +62,9 @@ public final class CoveringQuery extends Query {
this.queries.addAll(queries); this.queries.addAll(queries);
this.minimumNumberMatch = Objects.requireNonNull(minimumNumberMatch); this.minimumNumberMatch = Objects.requireNonNull(minimumNumberMatch);
this.hashCode = computeHashCode(); this.hashCode = computeHashCode();
this.ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(this.queries, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED);
} }
@Override @Override
@ -92,6 +99,11 @@ public final class CoveringQuery extends Query {
return hashCode; return hashCode;
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override @Override
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
Multiset<Query> rewritten = new Multiset<>(); Multiset<Query> rewritten = new Multiset<>();

View File

@ -28,6 +28,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
* Like {@link DocValuesTermsQuery}, but this query only * Like {@link DocValuesTermsQuery}, but this query only
@ -43,7 +45,8 @@ import org.apache.lucene.index.SortedNumericDocValues;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class DocValuesNumbersQuery extends Query { public class DocValuesNumbersQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(DocValuesNumbersQuery.class);
private final String field; private final String field;
private final LongHashSet numbers; private final LongHashSet numbers;
@ -102,6 +105,13 @@ public class DocValuesNumbersQuery extends Query {
.toString(); .toString();
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(field) +
RamUsageEstimator.sizeOfObject(numbers);
}
@Override @Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) { return new ConstantScoreWeight(this, boost) {

View File

@ -29,10 +29,12 @@ import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator; import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongBitSet; import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
* A {@link Query} that only accepts documents whose * A {@link Query} that only accepts documents whose
@ -91,7 +93,8 @@ import org.apache.lucene.util.LongBitSet;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class DocValuesTermsQuery extends Query { public class DocValuesTermsQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(DocValuesTermsQuery.class);
private final String field; private final String field;
private final PrefixCodedTerms termData; private final PrefixCodedTerms termData;
@ -164,6 +167,13 @@ public class DocValuesTermsQuery extends Query {
return builder.toString(); return builder.toString();
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(field) +
RamUsageEstimator.sizeOfObject(termData);
}
@Override @Override
public void visit(QueryVisitor visitor) { public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) { if (visitor.acceptField(field)) {

View File

@ -21,9 +21,12 @@ import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
final class LongHashSet extends AbstractSet<Long> { final class LongHashSet extends AbstractSet<Long> implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class);
private static final long MISSING = Long.MIN_VALUE; private static final long MISSING = Long.MIN_VALUE;
@ -114,6 +117,12 @@ final class LongHashSet extends AbstractSet<Long> {
return super.equals(obj); return super.equals(obj);
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(table);
}
@Override @Override
public boolean contains(Object o) { public boolean contains(Object o) {
return o instanceof Long && contains(((Long) o).longValue()); return o instanceof Long && contains(((Long) o).longValue());

View File

@ -33,8 +33,10 @@ import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition; import org.apache.lucene.util.automaton.Transition;
@ -66,7 +68,9 @@ import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZ
* *
* @lucene.experimental */ * @lucene.experimental */
public class TermAutomatonQuery extends Query { public class TermAutomatonQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(TermAutomatonQuery.class);
private final String field; private final String field;
private final Automaton.Builder builder; private final Automaton.Builder builder;
Automaton det; Automaton det;
@ -264,6 +268,16 @@ public class TermAutomatonQuery extends Query {
return System.identityHashCode(this); return System.identityHashCode(this);
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(builder) +
RamUsageEstimator.sizeOfObject(det) +
RamUsageEstimator.sizeOfObject(field) +
RamUsageEstimator.sizeOfObject(idToTerm) +
RamUsageEstimator.sizeOfObject(termToID);
}
/** Returns the dot (graphviz) representation of this automaton. /** Returns the dot (graphviz) representation of this automaton.
* This is extremely useful for visualizing the automaton. */ * This is extremely useful for visualizing the automaton. */
public String toDot() { public String toDot() {

View File

@ -33,7 +33,9 @@ import org.apache.lucene.spatial3d.geom.BasePlanetObject;
import org.apache.lucene.spatial3d.geom.GeoShape; import org.apache.lucene.spatial3d.geom.GeoShape;
import org.apache.lucene.spatial3d.geom.PlanetModel; import org.apache.lucene.spatial3d.geom.PlanetModel;
import org.apache.lucene.spatial3d.geom.XYZBounds; import org.apache.lucene.spatial3d.geom.XYZBounds;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
/** Finds all previously indexed points that fall within the specified polygon. /** Finds all previously indexed points that fall within the specified polygon.
* *
@ -41,7 +43,9 @@ import org.apache.lucene.util.DocIdSetBuilder;
* *
* @lucene.experimental */ * @lucene.experimental */
final class PointInGeo3DShapeQuery extends Query { final class PointInGeo3DShapeQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(PointInGeo3DShapeQuery.class);
final String field; final String field;
final GeoShape shape; final GeoShape shape;
final XYZBounds shapeBounds; final XYZBounds shapeBounds;
@ -162,4 +166,12 @@ final class PointInGeo3DShapeQuery extends Query {
sb.append(shape); sb.append(shape);
return sb.toString(); return sb.toString();
} }
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(field) +
RamUsageEstimator.sizeOfObject(shape) +
RamUsageEstimator.sizeOfObject(shapeBounds);
}
} }

View File

@ -27,10 +27,12 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
@ -76,13 +78,17 @@ import org.apache.lucene.util.fst.Util;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class ContextQuery extends CompletionQuery { public class ContextQuery extends CompletionQuery implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(ContextQuery.class);
private IntsRefBuilder scratch = new IntsRefBuilder(); private IntsRefBuilder scratch = new IntsRefBuilder();
private Map<IntsRef, ContextMetaData> contexts; private Map<IntsRef, ContextMetaData> contexts;
private boolean matchAllContexts = false; private boolean matchAllContexts = false;
/** Inner completion query */ /** Inner completion query */
protected CompletionQuery innerQuery; protected CompletionQuery innerQuery;
private long ramBytesUsed;
/** /**
* Constructs a context completion query that matches * Constructs a context completion query that matches
* documents specified by <code>query</code>. * documents specified by <code>query</code>.
@ -98,6 +104,13 @@ public class ContextQuery extends CompletionQuery {
} }
this.innerQuery = query; this.innerQuery = query;
contexts = new HashMap<>(); contexts = new HashMap<>();
updateRamBytesUsed();
}
private void updateRamBytesUsed() {
ramBytesUsed = BASE_RAM_BYTES +
RamUsageEstimator.sizeOfObject(contexts) +
RamUsageEstimator.sizeOfObject(innerQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED);
} }
/** /**
@ -129,6 +142,7 @@ public class ContextQuery extends CompletionQuery {
} }
} }
contexts.put(IntsRef.deepCopyOf(Util.toIntsRef(new BytesRef(context), scratch)), new ContextMetaData(boost, exact)); contexts.put(IntsRef.deepCopyOf(Util.toIntsRef(new BytesRef(context), scratch)), new ContextMetaData(boost, exact));
updateRamBytesUsed();
} }
/** /**
@ -342,4 +356,8 @@ public class ContextQuery extends CompletionQuery {
visitor.visitLeaf(this); visitor.visitLeaf(this);
} }
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
} }