LUCENE-4687: Lazily initialize TermsEnum in BloomFilterPostingsFormat

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1434664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2013-01-17 13:27:33 +00:00
parent 49432e0724
commit b27bd106d4
5 changed files with 47 additions and 29 deletions

View File

@ -40,6 +40,11 @@ Changes in backwards compatibility policy
support in-memory caching, CategoryListCache was removed too.
(Shai Erera, Michael McCandless)
Optimizations
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
TermsEnum only if needed to do a seek or get a DocsEnum. (Simon Willnauer)
New Features
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the

View File

@ -236,26 +236,22 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
TermsEnum result;
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
// recycle the existing BloomFilteredTermsEnum by asking the delegate
// to recycle its contained TermsEnum
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
if (bfte.filter == filter) {
bfte.delegateTermsEnum = delegateTerms
.iterator(bfte.delegateTermsEnum);
bfte.reset(delegateTerms, bfte.delegateTermsEnum);
return bfte;
}
}
// We have been handed something we cannot reuse (either null, wrong
// class or wrong filter) so allocate a new object
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
filter);
return result;
return new BloomFilteredTermsEnum(delegateTerms, reuse, filter);
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return delegateTerms.getComparator();
}
@ -295,24 +291,43 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
}
}
class BloomFilteredTermsEnum extends TermsEnum {
final class BloomFilteredTermsEnum extends TermsEnum {
private Terms delegateTerms;
private TermsEnum delegateTermsEnum;
private TermsEnum reuseDelegate;
private final FuzzySet filter;
TermsEnum delegateTermsEnum;
private FuzzySet filter;
public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
this.delegateTermsEnum = iterator;
public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter) throws IOException {
this.delegateTerms = delegateTerms;
this.reuseDelegate = reuseDelegate;
this.filter = filter;
}
void reset(Terms delegateTerms, TermsEnum reuseDelegate) throws IOException {
this.delegateTerms = delegateTerms;
this.reuseDelegate = reuseDelegate;
this.delegateTermsEnum = null;
}
private final TermsEnum delegate() throws IOException {
if (delegateTermsEnum == null) {
/* pull the iterator only if we really need it -
* this can be a relativly heavy operation depending on the
* delegate postings format and they underlying directory
* (clone IndexInput) */
delegateTermsEnum = delegateTerms.iterator(reuseDelegate);
}
return delegateTermsEnum;
}
@Override
public final BytesRef next() throws IOException {
return delegateTermsEnum.next();
return delegate().next();
}
@Override
public final Comparator<BytesRef> getComparator() {
return delegateTermsEnum.getComparator();
return delegateTerms.getComparator();
}
@Override
@ -326,51 +341,51 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
if (filter.contains(text) == ContainsResult.NO) {
return false;
}
return delegateTermsEnum.seekExact(text, useCache);
return delegate().seekExact(text, useCache);
}
@Override
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
throws IOException {
return delegateTermsEnum.seekCeil(text, useCache);
return delegate().seekCeil(text, useCache);
}
@Override
public final void seekExact(long ord) throws IOException {
delegateTermsEnum.seekExact(ord);
delegate().seekExact(ord);
}
@Override
public final BytesRef term() throws IOException {
return delegateTermsEnum.term();
return delegate().term();
}
@Override
public final long ord() throws IOException {
return delegateTermsEnum.ord();
return delegate().ord();
}
@Override
public final int docFreq() throws IOException {
return delegateTermsEnum.docFreq();
return delegate().docFreq();
}
@Override
public final long totalTermFreq() throws IOException {
return delegateTermsEnum.totalTermFreq();
return delegate().totalTermFreq();
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
DocsAndPositionsEnum reuse, int flags) throws IOException {
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
return delegate().docsAndPositions(liveDocs, reuse, flags);
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
throws IOException {
return delegateTermsEnum.docs(liveDocs, reuse, flags);
return delegate().docs(liveDocs, reuse, flags);
}
@ -383,12 +398,10 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
private SegmentWriteState state;
// private PostingsFormat delegatePostingsFormat;
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
this.delegateFieldsConsumer = fieldsConsumer;
// this.delegatePostingsFormat=delegatePostingsFormat;
this.state = state;
}

View File

@ -272,7 +272,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}

View File

@ -87,7 +87,7 @@ public class FilterAtomicReader extends AtomicReader {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
public Comparator<BytesRef> getComparator() {
return in.getComparator();
}

View File

@ -80,7 +80,7 @@ public abstract class Terms {
* if there are no terms. This method may be invoked
* many times; it's best to cache a single instance &
* reuse it. */
public abstract Comparator<BytesRef> getComparator() throws IOException;
public abstract Comparator<BytesRef> getComparator();
/** Returns the number of terms for this field, or -1 if this
* measure isn't stored by the codec. Note that, just like