LUCENE-6088: TermsFilter implements Accountable.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1643079 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2014-12-03 10:26:36 +00:00
parent 8e1d811e09
commit 07cf3638ec
3 changed files with 48 additions and 5 deletions

View File

@ -120,6 +120,8 @@ New Features
* LUCENE-6089, LUCENE-6090: Tune CompressionMode.HIGH_COMPRESSION for
better compression and less cpu usage. (Adrien Grand, Robert Muir)
* LUCENE-6088: TermsFilter implements Accountable. (Adrien Grand)
API Changes
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and

View File

@ -33,10 +33,12 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Constructs a filter for docs matching any of the terms added to this class.
@ -45,7 +47,9 @@ import org.apache.lucene.util.BytesRef;
* a choice of "category" labels picked by the end user. As a filter, this is much faster than the
* equivalent query (a BooleanQuery with many "should" TermQueries)
*/
public final class TermsFilter extends Filter {
public final class TermsFilter extends Filter implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsFilter.class);
/*
* this class is often used for large number of terms in a single field.
@ -178,7 +182,14 @@ public final class TermsFilter extends Filter {
this.hashCode = hash;
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED
+ RamUsageEstimator.sizeOf(termsAndFields)
+ RamUsageEstimator.sizeOf(termsBytes)
+ RamUsageEstimator.sizeOf(offsets);
}
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
@ -254,7 +265,13 @@ public final class TermsFilter extends Filter {
return builder.toString();
}
private static final class TermsAndField {
private static final class TermsAndField implements Accountable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(TermsAndField.class)
+ RamUsageEstimator.shallowSizeOfInstance(String.class)
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // header of the array held by the String
final int start;
final int end;
final String field;
@ -267,6 +284,13 @@ public final class TermsFilter extends Filter {
this.field = field;
}
@Override
public long ramBytesUsed() {
// this is an approximation since we don't actually know how strings store
// their data, which can be JVM-dependent
return BASE_RAM_BYTES_USED + field.length() * RamUsageEstimator.NUM_BYTES_CHAR;
}
@Override
public int hashCode() {
final int prime = 31;
@ -317,4 +341,5 @@ public final class TermsFilter extends Filter {
Collections.sort(toSort);
return toSort;
}
}

View File

@ -46,12 +46,14 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
public class TermsFilterTest extends LuceneTestCase {
public void testCachability() throws Exception {
@ -336,4 +338,18 @@ public class TermsFilterTest extends LuceneTestCase {
new Term("field1", "c"));
assertEquals("field1:a field1:b field1:c", termsFilter.toString());
}
public void testRamBytesUsed() {
List<Term> terms = new ArrayList<>();
final int numTerms = 1000 + random().nextInt(1000);
for (int i = 0; i < numTerms; ++i) {
terms.add(new Term("f", RandomStrings.randomUnicodeOfLength(random(), 10)));
}
TermsFilter filter = new TermsFilter(terms);
final long actualRamBytesUsed = RamUsageTester.sizeOf(filter);
final long expectedRamBytesUsed = filter.ramBytesUsed();
// error margin within 1%
assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 100);
}
}