mirror of https://github.com/apache/lucene.git
LUCENE-6088: TermsFilter implements Accountable.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1643079 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8e1d811e09
commit
07cf3638ec
|
@ -120,6 +120,8 @@ New Features
|
|||
* LUCENE-6089, LUCENE-6090: Tune CompressionMode.HIGH_COMPRESSION for
|
||||
better compression and less cpu usage. (Adrien Grand, Robert Muir)
|
||||
|
||||
* LUCENE-6088: TermsFilter implements Accountable. (Adrien Grand)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
|
||||
|
|
|
@ -33,10 +33,12 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Constructs a filter for docs matching any of the terms added to this class.
|
||||
|
@ -45,7 +47,9 @@ import org.apache.lucene.util.BytesRef;
|
|||
* a choice of "category" labels picked by the end user. As a filter, this is much faster than the
|
||||
* equivalent query (a BooleanQuery with many "should" TermQueries)
|
||||
*/
|
||||
public final class TermsFilter extends Filter {
|
||||
public final class TermsFilter extends Filter implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsFilter.class);
|
||||
|
||||
/*
|
||||
* this class is often used for large number of terms in a single field.
|
||||
|
@ -178,7 +182,14 @@ public final class TermsFilter extends Filter {
|
|||
this.hashCode = hash;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED
|
||||
+ RamUsageEstimator.sizeOf(termsAndFields)
|
||||
+ RamUsageEstimator.sizeOf(termsBytes)
|
||||
+ RamUsageEstimator.sizeOf(offsets);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
|
@ -254,7 +265,13 @@ public final class TermsFilter extends Filter {
|
|||
return builder.toString();
|
||||
}
|
||||
|
||||
private static final class TermsAndField {
|
||||
private static final class TermsAndField implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(TermsAndField.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(String.class)
|
||||
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // header of the array held by the String
|
||||
|
||||
final int start;
|
||||
final int end;
|
||||
final String field;
|
||||
|
@ -267,6 +284,13 @@ public final class TermsFilter extends Filter {
|
|||
this.field = field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
// this is an approximation since we don't actually know how strings store
|
||||
// their data, which can be JVM-dependent
|
||||
return BASE_RAM_BYTES_USED + field.length() * RamUsageEstimator.NUM_BYTES_CHAR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
|
@ -317,4 +341,5 @@ public final class TermsFilter extends Filter {
|
|||
Collections.sort(toSort);
|
||||
return toSort;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -46,12 +46,14 @@ import org.apache.lucene.search.ScoreDoc;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.RamUsageTester;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||
|
||||
public class TermsFilterTest extends LuceneTestCase {
|
||||
|
||||
public void testCachability() throws Exception {
|
||||
|
@ -336,4 +338,18 @@ public class TermsFilterTest extends LuceneTestCase {
|
|||
new Term("field1", "c"));
|
||||
assertEquals("field1:a field1:b field1:c", termsFilter.toString());
|
||||
}
|
||||
|
||||
public void testRamBytesUsed() {
|
||||
List<Term> terms = new ArrayList<>();
|
||||
final int numTerms = 1000 + random().nextInt(1000);
|
||||
for (int i = 0; i < numTerms; ++i) {
|
||||
terms.add(new Term("f", RandomStrings.randomUnicodeOfLength(random(), 10)));
|
||||
}
|
||||
TermsFilter filter = new TermsFilter(terms);
|
||||
final long actualRamBytesUsed = RamUsageTester.sizeOf(filter);
|
||||
final long expectedRamBytesUsed = filter.ramBytesUsed();
|
||||
// error margin within 1%
|
||||
assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 100);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue