diff --git a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java index 6c29f160597..513b5a3c96a 100644 --- a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java +++ b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java @@ -62,13 +62,7 @@ public class UidField extends Field { if (terms == null) { return null; } - // hack to break early if we have a bloom filter... - if (terms instanceof BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) { - if (!((BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) terms).getFilter().mightContain(term.bytes())) { - return null; - } - } - TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = terms.iterator(null); if (termsEnum == null) { return null; } @@ -110,13 +104,7 @@ public class UidField extends Field { if (terms == null) { return -1; } - // hack to break early if we have a bloom filter... - if (terms instanceof BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) { - if (!((BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms) terms).getFilter().mightContain(term.bytes())) { - return -1; - } - } - TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = terms.iterator(null); if (termsEnum == null) { return -1; } diff --git a/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java b/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java index 6372305a4c0..cfbf383f3f7 100644 --- a/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java +++ b/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java @@ -180,167 +180,190 @@ public final class BloomFilterPostingsFormat extends PostingsFormat { return delegateFieldsProducer.getUniqueTermCount(); } - public class BloomFilteredTerms extends Terms { - private Terms delegateTerms; - private BloomFilter filter; + + } + + public static final class BloomFilteredTerms extends Terms { + private Terms delegateTerms; + private BloomFilter filter; - public BloomFilteredTerms(Terms terms, BloomFilter filter) { - this.delegateTerms = terms; - this.filter = filter; - } - - public BloomFilter getFilter() { - return filter; - } - - @Override - public TermsEnum intersect(CompiledAutomaton compiled, - final BytesRef startTerm) throws IOException { - return delegateTerms.intersect(compiled, startTerm); - } - - @Override - public TermsEnum iterator(TermsEnum reuse) throws IOException { - TermsEnum result; - if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) { - // recycle the existing BloomFilteredTermsEnum by asking the delegate - // to recycle its contained TermsEnum - BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse; - if (bfte.filter == filter) { - bfte.delegateTermsEnum = delegateTerms.iterator(bfte.delegateTermsEnum); - return bfte; - } - } - // We have been handed something we cannot reuse (either null, wrong - // class or wrong filter) so allocate a new object - result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse), filter); - return result; - } - - @Override - public Comparator getComparator() { - return delegateTerms.getComparator(); - } - - @Override - public long size() throws IOException { - return delegateTerms.size(); - } - - @Override - public long getSumTotalTermFreq() throws IOException { - return delegateTerms.getSumTotalTermFreq(); - } - - @Override - public long getSumDocFreq() throws IOException { - return delegateTerms.getSumDocFreq(); - } - - @Override - public int getDocCount() throws IOException { - return delegateTerms.getDocCount(); - } - - @Override - public boolean hasOffsets() { - return delegateTerms.hasOffsets(); - } - - @Override - public boolean hasPositions() { - return delegateTerms.hasPositions(); - } - - @Override - public boolean hasPayloads() { - return delegateTerms.hasPayloads(); - } + public BloomFilteredTerms(Terms terms, BloomFilter filter) { + this.delegateTerms = terms; + this.filter = filter; } - class BloomFilteredTermsEnum extends TermsEnum { - - TermsEnum delegateTermsEnum; - private BloomFilter filter; - - public BloomFilteredTermsEnum(TermsEnum iterator, BloomFilter filter) { - this.delegateTermsEnum = iterator; - this.filter = filter; - } - - @Override - public final BytesRef next() throws IOException { - return delegateTermsEnum.next(); - } - - @Override - public final Comparator getComparator() { - return delegateTermsEnum.getComparator(); - } - - @Override - public final boolean seekExact(BytesRef text, boolean useCache) - throws IOException { - // The magical fail-fast speed up that is the entire point of all of - // this code - save a disk seek if there is a match on an in-memory - // structure - // that may occasionally give a false positive but guaranteed no false - // negatives - if (!filter.mightContain(text)) { - return false; - } - return delegateTermsEnum.seekExact(text, useCache); - } - - @Override - public final SeekStatus seekCeil(BytesRef text, boolean useCache) - throws IOException { - return delegateTermsEnum.seekCeil(text, useCache); - } - - @Override - public final void seekExact(long ord) throws IOException { - delegateTermsEnum.seekExact(ord); - } - - @Override - public final BytesRef term() throws IOException { - return delegateTermsEnum.term(); - } - - @Override - public final long ord() throws IOException { - return delegateTermsEnum.ord(); - } - - @Override - public final int docFreq() throws IOException { - return delegateTermsEnum.docFreq(); - } - - @Override - public final long totalTermFreq() throws IOException { - return delegateTermsEnum.totalTermFreq(); - } - - - @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, - DocsAndPositionsEnum reuse, int flags) throws IOException { - return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags); - } - - @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) - throws IOException { - return delegateTermsEnum.docs(liveDocs, reuse, flags); - } - - + public BloomFilter getFilter() { + return filter; } + @Override + public TermsEnum intersect(CompiledAutomaton compiled, + final BytesRef startTerm) throws IOException { + return delegateTerms.intersect(compiled, startTerm); + } + + @Override + public TermsEnum iterator(TermsEnum reuse) throws IOException { + TermsEnum result; + if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) { + // recycle the existing BloomFilteredTermsEnum by asking the delegate + // to recycle its contained TermsEnum + BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse; + if (bfte.filter == filter) { + bfte.reset(delegateTerms); + return bfte; + } + reuse = bfte.reuse; + } + // We have been handed something we cannot reuse (either null, wrong + // class or wrong filter) so allocate a new object + result = new BloomFilteredTermsEnum(delegateTerms, reuse, filter); + return result; + } + + @Override + public Comparator getComparator() { + return delegateTerms.getComparator(); + } + + @Override + public long size() throws IOException { + return delegateTerms.size(); + } + + @Override + public long getSumTotalTermFreq() throws IOException { + return delegateTerms.getSumTotalTermFreq(); + } + + @Override + public long getSumDocFreq() throws IOException { + return delegateTerms.getSumDocFreq(); + } + + @Override + public int getDocCount() throws IOException { + return delegateTerms.getDocCount(); + } + + @Override + public boolean hasOffsets() { + return delegateTerms.hasOffsets(); + } + + @Override + public boolean hasPositions() { + return delegateTerms.hasPositions(); + } + + @Override + public boolean hasPayloads() { + return delegateTerms.hasPayloads(); + } + } + + static final class BloomFilteredTermsEnum extends TermsEnum { + + private Terms delegateTerms; + private TermsEnum delegateTermsEnum; + private TermsEnum reuse; + private BloomFilter filter; + + public BloomFilteredTermsEnum(Terms other, TermsEnum reuse, BloomFilter filter) { + this.delegateTerms = other; + this.reuse = reuse; + this.filter = filter; + } + + void reset(Terms others) { + reuse = this.delegateTermsEnum; + this.delegateTermsEnum = null; + this.delegateTerms = others; + } + + private TermsEnum getDelegate() throws IOException { + if (delegateTermsEnum == null) { + /* pull the iterator only if we really need it - + * this can be a relatively heavy operation depending on the + * delegate postings format and they underlying directory + * (clone IndexInput) */ + delegateTermsEnum = delegateTerms.iterator(reuse); + } + return delegateTermsEnum; + } + + @Override + public final BytesRef next() throws IOException { + return getDelegate().next(); + } + + @Override + public final Comparator getComparator() { + return delegateTerms.getComparator(); + } + + @Override + public final boolean seekExact(BytesRef text, boolean useCache) + throws IOException { + // The magical fail-fast speed up that is the entire point of all of + // this code - save a disk seek if there is a match on an in-memory + // structure + // that may occasionally give a false positive but guaranteed no false + // negatives + if (!filter.mightContain(text)) { + return false; + } + return getDelegate().seekExact(text, useCache); + } + + @Override + public final SeekStatus seekCeil(BytesRef text, boolean useCache) + throws IOException { + return getDelegate().seekCeil(text, useCache); + } + + @Override + public final void seekExact(long ord) throws IOException { + getDelegate().seekExact(ord); + } + + @Override + public final BytesRef term() throws IOException { + return getDelegate().term(); + } + + @Override + public final long ord() throws IOException { + return getDelegate().ord(); + } + + @Override + public final int docFreq() throws IOException { + return getDelegate().docFreq(); + } + + @Override + public final long totalTermFreq() throws IOException { + return getDelegate().totalTermFreq(); + } + + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, + DocsAndPositionsEnum reuse, int flags) throws IOException { + return getDelegate().docsAndPositions(liveDocs, reuse, flags); + } + + @Override + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) + throws IOException { + return getDelegate().docs(liveDocs, reuse, flags); + } + + } + final class BloomFilteredFieldsConsumer extends FieldsConsumer { private FieldsConsumer delegateFieldsConsumer; private Map bloomFilters = new HashMap(); @@ -381,10 +404,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat { List> nonSaturatedBlooms = new ArrayList>(); for (Entry entry : bloomFilters.entrySet()) { - BloomFilter bloomFilter = entry.getValue(); - //if (!bloomFilterFactory.isSaturated(bloomFilter, entry.getKey())) { nonSaturatedBlooms.add(entry); - //} } String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); diff --git a/src/test/java/org/elasticsearch/test/unit/index/codec/postingformat/DefaultPostingsFormatTests.java b/src/test/java/org/elasticsearch/test/unit/index/codec/postingformat/DefaultPostingsFormatTests.java index f430e9d21f7..fabb5bc480c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/codec/postingformat/DefaultPostingsFormatTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/codec/postingformat/DefaultPostingsFormatTests.java @@ -82,8 +82,8 @@ public class DefaultPostingsFormatTests { Terms uidTerms = ar.terms(UidFieldMapper.NAME); assertThat(terms.size(), equalTo(1l)); - assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class))); - assertThat(uidTerms, instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class)); + assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); + assertThat(uidTerms, instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)); reader.close(); writer.close(); @@ -112,8 +112,8 @@ public class DefaultPostingsFormatTests { Terms some_other_field = ar.terms("some_other_field"); assertThat(terms.size(), equalTo(2l)); - assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class))); - assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredFieldsProducer.BloomFilteredTerms.class))); + assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); + assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); TermsEnum iterator = terms.iterator(null); Set expected = new HashSet(); expected.add("foo");