Lazily initialize the delegate in BloomFilteredPostingsFormat to prevent unnecessary loading if bloomfilter terminates early
This commit is contained in:
parent
747ce36915
commit
4705eb2959
|
@ -62,13 +62,7 @@ public class UidField extends Field {
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
// hack to break early if we have a bloom filter...
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (terms instanceof BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) {
|
|
||||||
if (!((BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) terms).getFilter().mightContain(term.bytes())) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TermsEnum termsEnum = terms.iterator(null);
|
|
||||||
if (termsEnum == null) {
|
if (termsEnum == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -110,13 +104,7 @@ public class UidField extends Field {
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// hack to break early if we have a bloom filter...
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (terms instanceof BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) {
|
|
||||||
if (!((BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) terms).getFilter().mightContain(term.bytes())) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TermsEnum termsEnum = terms.iterator(null);
|
|
||||||
if (termsEnum == null) {
|
if (termsEnum == null) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,167 +180,190 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||||
return delegateFieldsProducer.getUniqueTermCount();
|
return delegateFieldsProducer.getUniqueTermCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public class BloomFilteredTerms extends Terms {
|
|
||||||
private Terms delegateTerms;
|
|
||||||
private BloomFilter filter;
|
|
||||||
|
|
||||||
public BloomFilteredTerms(Terms terms, BloomFilter filter) {
|
|
||||||
this.delegateTerms = terms;
|
|
||||||
this.filter = filter;
|
|
||||||
}
|
|
||||||
|
|
||||||
public BloomFilter getFilter() {
|
|
||||||
return filter;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TermsEnum intersect(CompiledAutomaton compiled,
|
|
||||||
final BytesRef startTerm) throws IOException {
|
|
||||||
return delegateTerms.intersect(compiled, startTerm);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
|
||||||
TermsEnum result;
|
|
||||||
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
|
|
||||||
// recycle the existing BloomFilteredTermsEnum by asking the delegate
|
|
||||||
// to recycle its contained TermsEnum
|
|
||||||
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
|
|
||||||
if (bfte.filter == filter) {
|
|
||||||
bfte.delegateTermsEnum = delegateTerms.iterator(bfte.delegateTermsEnum);
|
|
||||||
return bfte;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// We have been handed something we cannot reuse (either null, wrong
|
|
||||||
// class or wrong filter) so allocate a new object
|
|
||||||
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse), filter);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Comparator<BytesRef> getComparator() {
|
|
||||||
return delegateTerms.getComparator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long size() throws IOException {
|
|
||||||
return delegateTerms.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getSumTotalTermFreq() throws IOException {
|
|
||||||
return delegateTerms.getSumTotalTermFreq();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getSumDocFreq() throws IOException {
|
|
||||||
return delegateTerms.getSumDocFreq();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocCount() throws IOException {
|
|
||||||
return delegateTerms.getDocCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasOffsets() {
|
|
||||||
return delegateTerms.hasOffsets();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPositions() {
|
|
||||||
return delegateTerms.hasPositions();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayloads() {
|
|
||||||
return delegateTerms.hasPayloads();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class BloomFilteredTermsEnum extends TermsEnum {
|
|
||||||
|
|
||||||
TermsEnum delegateTermsEnum;
|
|
||||||
private BloomFilter filter;
|
|
||||||
|
|
||||||
public BloomFilteredTermsEnum(TermsEnum iterator, BloomFilter filter) {
|
|
||||||
this.delegateTermsEnum = iterator;
|
|
||||||
this.filter = filter;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final BytesRef next() throws IOException {
|
|
||||||
return delegateTermsEnum.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final Comparator<BytesRef> getComparator() {
|
|
||||||
return delegateTermsEnum.getComparator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final boolean seekExact(BytesRef text, boolean useCache)
|
|
||||||
throws IOException {
|
|
||||||
// The magical fail-fast speed up that is the entire point of all of
|
|
||||||
// this code - save a disk seek if there is a match on an in-memory
|
|
||||||
// structure
|
|
||||||
// that may occasionally give a false positive but guaranteed no false
|
|
||||||
// negatives
|
|
||||||
if (!filter.mightContain(text)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return delegateTermsEnum.seekExact(text, useCache);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
|
|
||||||
throws IOException {
|
|
||||||
return delegateTermsEnum.seekCeil(text, useCache);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final void seekExact(long ord) throws IOException {
|
|
||||||
delegateTermsEnum.seekExact(ord);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final BytesRef term() throws IOException {
|
|
||||||
return delegateTermsEnum.term();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final long ord() throws IOException {
|
|
||||||
return delegateTermsEnum.ord();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final int docFreq() throws IOException {
|
|
||||||
return delegateTermsEnum.docFreq();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final long totalTermFreq() throws IOException {
|
|
||||||
return delegateTermsEnum.totalTermFreq();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
|
|
||||||
DocsAndPositionsEnum reuse, int flags) throws IOException {
|
|
||||||
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
|
|
||||||
throws IOException {
|
|
||||||
return delegateTermsEnum.docs(liveDocs, reuse, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final class BloomFilteredTerms extends Terms {
|
||||||
|
private Terms delegateTerms;
|
||||||
|
private BloomFilter filter;
|
||||||
|
|
||||||
|
public BloomFilteredTerms(Terms terms, BloomFilter filter) {
|
||||||
|
this.delegateTerms = terms;
|
||||||
|
this.filter = filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BloomFilter getFilter() {
|
||||||
|
return filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum intersect(CompiledAutomaton compiled,
|
||||||
|
final BytesRef startTerm) throws IOException {
|
||||||
|
return delegateTerms.intersect(compiled, startTerm);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
|
TermsEnum result;
|
||||||
|
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
|
||||||
|
// recycle the existing BloomFilteredTermsEnum by asking the delegate
|
||||||
|
// to recycle its contained TermsEnum
|
||||||
|
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
|
||||||
|
if (bfte.filter == filter) {
|
||||||
|
bfte.reset(delegateTerms);
|
||||||
|
return bfte;
|
||||||
|
}
|
||||||
|
reuse = bfte.reuse;
|
||||||
|
}
|
||||||
|
// We have been handed something we cannot reuse (either null, wrong
|
||||||
|
// class or wrong filter) so allocate a new object
|
||||||
|
result = new BloomFilteredTermsEnum(delegateTerms, reuse, filter);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return delegateTerms.getComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size() throws IOException {
|
||||||
|
return delegateTerms.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
|
return delegateTerms.getSumTotalTermFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return delegateTerms.getSumDocFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocCount() throws IOException {
|
||||||
|
return delegateTerms.getDocCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return delegateTerms.hasOffsets();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return delegateTerms.hasPositions();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return delegateTerms.hasPayloads();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class BloomFilteredTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
|
private Terms delegateTerms;
|
||||||
|
private TermsEnum delegateTermsEnum;
|
||||||
|
private TermsEnum reuse;
|
||||||
|
private BloomFilter filter;
|
||||||
|
|
||||||
|
public BloomFilteredTermsEnum(Terms other, TermsEnum reuse, BloomFilter filter) {
|
||||||
|
this.delegateTerms = other;
|
||||||
|
this.reuse = reuse;
|
||||||
|
this.filter = filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset(Terms others) {
|
||||||
|
reuse = this.delegateTermsEnum;
|
||||||
|
this.delegateTermsEnum = null;
|
||||||
|
this.delegateTerms = others;
|
||||||
|
}
|
||||||
|
|
||||||
|
private TermsEnum getDelegate() throws IOException {
|
||||||
|
if (delegateTermsEnum == null) {
|
||||||
|
/* pull the iterator only if we really need it -
|
||||||
|
* this can be a relatively heavy operation depending on the
|
||||||
|
* delegate postings format and they underlying directory
|
||||||
|
* (clone IndexInput) */
|
||||||
|
delegateTermsEnum = delegateTerms.iterator(reuse);
|
||||||
|
}
|
||||||
|
return delegateTermsEnum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final BytesRef next() throws IOException {
|
||||||
|
return getDelegate().next();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final Comparator<BytesRef> getComparator() {
|
||||||
|
return delegateTerms.getComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final boolean seekExact(BytesRef text, boolean useCache)
|
||||||
|
throws IOException {
|
||||||
|
// The magical fail-fast speed up that is the entire point of all of
|
||||||
|
// this code - save a disk seek if there is a match on an in-memory
|
||||||
|
// structure
|
||||||
|
// that may occasionally give a false positive but guaranteed no false
|
||||||
|
// negatives
|
||||||
|
if (!filter.mightContain(text)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return getDelegate().seekExact(text, useCache);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
|
||||||
|
throws IOException {
|
||||||
|
return getDelegate().seekCeil(text, useCache);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void seekExact(long ord) throws IOException {
|
||||||
|
getDelegate().seekExact(ord);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final BytesRef term() throws IOException {
|
||||||
|
return getDelegate().term();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final long ord() throws IOException {
|
||||||
|
return getDelegate().ord();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int docFreq() throws IOException {
|
||||||
|
return getDelegate().docFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final long totalTermFreq() throws IOException {
|
||||||
|
return getDelegate().totalTermFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
|
||||||
|
DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||||
|
return getDelegate().docsAndPositions(liveDocs, reuse, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
|
||||||
|
throws IOException {
|
||||||
|
return getDelegate().docs(liveDocs, reuse, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
final class BloomFilteredFieldsConsumer extends FieldsConsumer {
|
final class BloomFilteredFieldsConsumer extends FieldsConsumer {
|
||||||
private FieldsConsumer delegateFieldsConsumer;
|
private FieldsConsumer delegateFieldsConsumer;
|
||||||
private Map<FieldInfo, BloomFilter> bloomFilters = new HashMap<FieldInfo, BloomFilter>();
|
private Map<FieldInfo, BloomFilter> bloomFilters = new HashMap<FieldInfo, BloomFilter>();
|
||||||
|
@ -381,10 +404,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||||
List<Entry<FieldInfo, BloomFilter>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo, BloomFilter>>();
|
List<Entry<FieldInfo, BloomFilter>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo, BloomFilter>>();
|
||||||
|
|
||||||
for (Entry<FieldInfo, BloomFilter> entry : bloomFilters.entrySet()) {
|
for (Entry<FieldInfo, BloomFilter> entry : bloomFilters.entrySet()) {
|
||||||
BloomFilter bloomFilter = entry.getValue();
|
|
||||||
//if (!bloomFilterFactory.isSaturated(bloomFilter, entry.getKey())) {
|
|
||||||
nonSaturatedBlooms.add(entry);
|
nonSaturatedBlooms.add(entry);
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
String bloomFileName = IndexFileNames.segmentFileName(
|
String bloomFileName = IndexFileNames.segmentFileName(
|
||||||
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||||
|
|
|
@ -82,8 +82,8 @@ public class DefaultPostingsFormatTests {
|
||||||
Terms uidTerms = ar.terms(UidFieldMapper.NAME);
|
Terms uidTerms = ar.terms(UidFieldMapper.NAME);
|
||||||
|
|
||||||
assertThat(terms.size(), equalTo(1l));
|
assertThat(terms.size(), equalTo(1l));
|
||||||
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class)));
|
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
|
||||||
assertThat(uidTerms, instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class));
|
assertThat(uidTerms, instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class));
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
writer.close();
|
writer.close();
|
||||||
|
@ -112,8 +112,8 @@ public class DefaultPostingsFormatTests {
|
||||||
Terms some_other_field = ar.terms("some_other_field");
|
Terms some_other_field = ar.terms("some_other_field");
|
||||||
|
|
||||||
assertThat(terms.size(), equalTo(2l));
|
assertThat(terms.size(), equalTo(2l));
|
||||||
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class)));
|
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
|
||||||
assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class)));
|
assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
|
||||||
TermsEnum iterator = terms.iterator(null);
|
TermsEnum iterator = terms.iterator(null);
|
||||||
Set<String> expected = new HashSet<String>();
|
Set<String> expected = new HashSet<String>();
|
||||||
expected.add("foo");
|
expected.add("foo");
|
||||||
|
|
Loading…
Reference in New Issue