mirror of https://github.com/apache/lucene.git
LUCENE-4687: Lazily initialize TermsEnum in BloomFilterPostingsFormat
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1434664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
49432e0724
commit
b27bd106d4
|
@ -40,6 +40,11 @@ Changes in backwards compatibility policy
|
|||
support in-memory caching, CategoryListCache was removed too.
|
||||
(Shai Erera, Michael McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
|
||||
TermsEnum only if needed to do a seek or get a DocsEnum. (Simon Willnauer)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the
|
||||
|
|
|
@ -236,26 +236,22 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||
TermsEnum result;
|
||||
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
|
||||
// recycle the existing BloomFilteredTermsEnum by asking the delegate
|
||||
// to recycle its contained TermsEnum
|
||||
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
|
||||
if (bfte.filter == filter) {
|
||||
bfte.delegateTermsEnum = delegateTerms
|
||||
.iterator(bfte.delegateTermsEnum);
|
||||
bfte.reset(delegateTerms, bfte.delegateTermsEnum);
|
||||
return bfte;
|
||||
}
|
||||
}
|
||||
// We have been handed something we cannot reuse (either null, wrong
|
||||
// class or wrong filter) so allocate a new object
|
||||
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
|
||||
filter);
|
||||
return result;
|
||||
return new BloomFilteredTermsEnum(delegateTerms, reuse, filter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return delegateTerms.getComparator();
|
||||
}
|
||||
|
||||
|
@ -295,24 +291,43 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
class BloomFilteredTermsEnum extends TermsEnum {
|
||||
final class BloomFilteredTermsEnum extends TermsEnum {
|
||||
private Terms delegateTerms;
|
||||
private TermsEnum delegateTermsEnum;
|
||||
private TermsEnum reuseDelegate;
|
||||
private final FuzzySet filter;
|
||||
|
||||
TermsEnum delegateTermsEnum;
|
||||
private FuzzySet filter;
|
||||
|
||||
public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
|
||||
this.delegateTermsEnum = iterator;
|
||||
public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter) throws IOException {
|
||||
this.delegateTerms = delegateTerms;
|
||||
this.reuseDelegate = reuseDelegate;
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
void reset(Terms delegateTerms, TermsEnum reuseDelegate) throws IOException {
|
||||
this.delegateTerms = delegateTerms;
|
||||
this.reuseDelegate = reuseDelegate;
|
||||
this.delegateTermsEnum = null;
|
||||
}
|
||||
|
||||
private final TermsEnum delegate() throws IOException {
|
||||
if (delegateTermsEnum == null) {
|
||||
/* pull the iterator only if we really need it -
|
||||
* this can be a relativly heavy operation depending on the
|
||||
* delegate postings format and they underlying directory
|
||||
* (clone IndexInput) */
|
||||
delegateTermsEnum = delegateTerms.iterator(reuseDelegate);
|
||||
}
|
||||
return delegateTermsEnum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final BytesRef next() throws IOException {
|
||||
return delegateTermsEnum.next();
|
||||
return delegate().next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Comparator<BytesRef> getComparator() {
|
||||
return delegateTermsEnum.getComparator();
|
||||
return delegateTerms.getComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -326,51 +341,51 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
if (filter.contains(text) == ContainsResult.NO) {
|
||||
return false;
|
||||
}
|
||||
return delegateTermsEnum.seekExact(text, useCache);
|
||||
return delegate().seekExact(text, useCache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
|
||||
throws IOException {
|
||||
return delegateTermsEnum.seekCeil(text, useCache);
|
||||
return delegate().seekCeil(text, useCache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void seekExact(long ord) throws IOException {
|
||||
delegateTermsEnum.seekExact(ord);
|
||||
delegate().seekExact(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final BytesRef term() throws IOException {
|
||||
return delegateTermsEnum.term();
|
||||
return delegate().term();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long ord() throws IOException {
|
||||
return delegateTermsEnum.ord();
|
||||
return delegate().ord();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int docFreq() throws IOException {
|
||||
return delegateTermsEnum.docFreq();
|
||||
return delegate().docFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long totalTermFreq() throws IOException {
|
||||
return delegateTermsEnum.totalTermFreq();
|
||||
return delegate().totalTermFreq();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
|
||||
DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
|
||||
return delegate().docsAndPositions(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
|
||||
throws IOException {
|
||||
return delegateTermsEnum.docs(liveDocs, reuse, flags);
|
||||
return delegate().docs(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
|
||||
|
@ -383,12 +398,10 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
|
||||
private SegmentWriteState state;
|
||||
|
||||
// private PostingsFormat delegatePostingsFormat;
|
||||
|
||||
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
|
||||
SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
|
||||
this.delegateFieldsConsumer = fieldsConsumer;
|
||||
// this.delegatePostingsFormat=delegatePostingsFormat;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
|
|
|
@ -272,7 +272,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return in.getComparator();
|
||||
}
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ public abstract class Terms {
|
|||
* if there are no terms. This method may be invoked
|
||||
* many times; it's best to cache a single instance &
|
||||
* reuse it. */
|
||||
public abstract Comparator<BytesRef> getComparator() throws IOException;
|
||||
public abstract Comparator<BytesRef> getComparator();
|
||||
|
||||
/** Returns the number of terms for this field, or -1 if this
|
||||
* measure isn't stored by the codec. Note that, just like
|
||||
|
|
Loading…
Reference in New Issue