diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e1785ebebcc..ad47c76f7d8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -120,6 +120,8 @@ New Features * LUCENE-6089, LUCENE-6090: Tune CompressionMode.HIGH_COMPRESSION for better compression and less cpu usage. (Adrien Grand, Robert Muir) +* LUCENE-6088: TermsFilter implements Accountable. (Adrien Grand) + API Changes * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and diff --git a/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java b/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java index 6ef3890a0ae..b9b7e0f74c3 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java @@ -33,10 +33,12 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Accountable; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; /** * Constructs a filter for docs matching any of the terms added to this class. @@ -45,7 +47,9 @@ import org.apache.lucene.util.BytesRef; * a choice of "category" labels picked by the end user. As a filter, this is much faster than the * equivalent query (a BooleanQuery with many "should" TermQueries) */ -public final class TermsFilter extends Filter { +public final class TermsFilter extends Filter implements Accountable { + + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsFilter.class); /* * this class is often used for large number of terms in a single field. @@ -178,7 +182,14 @@ public final class TermsFilter extends Filter { this.hashCode = hash; } - + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + + RamUsageEstimator.sizeOf(termsAndFields) + + RamUsageEstimator.sizeOf(termsBytes) + + RamUsageEstimator.sizeOf(offsets); + } @Override public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException { @@ -254,7 +265,13 @@ public final class TermsFilter extends Filter { return builder.toString(); } - private static final class TermsAndField { + private static final class TermsAndField implements Accountable { + + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(TermsAndField.class) + + RamUsageEstimator.shallowSizeOfInstance(String.class) + + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // header of the array held by the String + final int start; final int end; final String field; @@ -267,6 +284,13 @@ public final class TermsFilter extends Filter { this.field = field; } + @Override + public long ramBytesUsed() { + // this is an approximation since we don't actually know how strings store + // their data, which can be JVM-dependent + return BASE_RAM_BYTES_USED + field.length() * RamUsageEstimator.NUM_BYTES_CHAR; + } + @Override public int hashCode() { final int prime = 31; @@ -317,4 +341,5 @@ public final class TermsFilter extends Filter { Collections.sort(toSort); return toSort; } + } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java b/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java index 8ff17468d59..c275233cb89 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java @@ -46,12 +46,14 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BitDocIdSet; -import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.RamUsageTester; import org.apache.lucene.util.TestUtil; +import com.carrotsearch.randomizedtesting.generators.RandomStrings; + public class TermsFilterTest extends LuceneTestCase { public void testCachability() throws Exception { @@ -336,4 +338,18 @@ public class TermsFilterTest extends LuceneTestCase { new Term("field1", "c")); assertEquals("field1:a field1:b field1:c", termsFilter.toString()); } + + public void testRamBytesUsed() { + List terms = new ArrayList<>(); + final int numTerms = 1000 + random().nextInt(1000); + for (int i = 0; i < numTerms; ++i) { + terms.add(new Term("f", RandomStrings.randomUnicodeOfLength(random(), 10))); + } + TermsFilter filter = new TermsFilter(terms); + final long actualRamBytesUsed = RamUsageTester.sizeOf(filter); + final long expectedRamBytesUsed = filter.ramBytesUsed(); + // error margin within 1% + assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 100); + } + }