From b27bd106d47e13d44cc63d1ce8722075a9b43a20 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 17 Jan 2013 13:27:33 +0000 Subject: [PATCH] LUCENE-4687: Lazily initialize TermsEnum in BloomFilterPostingsFormat git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1434664 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 ++ .../bloom/BloomFilteringPostingsFormat.java | 65 +++++++++++-------- .../SimpleTextTermVectorsReader.java | 2 +- .../lucene/index/FilterAtomicReader.java | 2 +- .../java/org/apache/lucene/index/Terms.java | 2 +- 5 files changed, 47 insertions(+), 29 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 68296db5d17..d95b8346da9 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -40,6 +40,11 @@ Changes in backwards compatibility policy support in-memory caching, CategoryListCache was removed too. (Shai Erera, Michael McCandless) +Optimizations + +* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate + TermsEnum only if needed to do a seek or get a DocsEnum. (Simon Willnauer) + New Features * LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java index 03885935eeb..d43994479f2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java @@ -236,26 +236,22 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { @Override public TermsEnum iterator(TermsEnum reuse) throws IOException { - TermsEnum result; if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) { // recycle the existing BloomFilteredTermsEnum by asking the delegate // to recycle its contained TermsEnum BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse; if (bfte.filter == filter) { - bfte.delegateTermsEnum = delegateTerms - .iterator(bfte.delegateTermsEnum); + bfte.reset(delegateTerms, bfte.delegateTermsEnum); return bfte; } } // We have been handed something we cannot reuse (either null, wrong // class or wrong filter) so allocate a new object - result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse), - filter); - return result; + return new BloomFilteredTermsEnum(delegateTerms, reuse, filter); } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return delegateTerms.getComparator(); } @@ -295,24 +291,43 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { } } - class BloomFilteredTermsEnum extends TermsEnum { + final class BloomFilteredTermsEnum extends TermsEnum { + private Terms delegateTerms; + private TermsEnum delegateTermsEnum; + private TermsEnum reuseDelegate; + private final FuzzySet filter; - TermsEnum delegateTermsEnum; - private FuzzySet filter; - - public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) { - this.delegateTermsEnum = iterator; + public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter) throws IOException { + this.delegateTerms = delegateTerms; + this.reuseDelegate = reuseDelegate; this.filter = filter; } + void reset(Terms delegateTerms, TermsEnum reuseDelegate) throws IOException { + this.delegateTerms = delegateTerms; + this.reuseDelegate = reuseDelegate; + this.delegateTermsEnum = null; + } + + private final TermsEnum delegate() throws IOException { + if (delegateTermsEnum == null) { + /* pull the iterator only if we really need it - + * this can be a relativly heavy operation depending on the + * delegate postings format and they underlying directory + * (clone IndexInput) */ + delegateTermsEnum = delegateTerms.iterator(reuseDelegate); + } + return delegateTermsEnum; + } + @Override public final BytesRef next() throws IOException { - return delegateTermsEnum.next(); + return delegate().next(); } @Override public final Comparator getComparator() { - return delegateTermsEnum.getComparator(); + return delegateTerms.getComparator(); } @Override @@ -326,51 +341,51 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { if (filter.contains(text) == ContainsResult.NO) { return false; } - return delegateTermsEnum.seekExact(text, useCache); + return delegate().seekExact(text, useCache); } @Override public final SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException { - return delegateTermsEnum.seekCeil(text, useCache); + return delegate().seekCeil(text, useCache); } @Override public final void seekExact(long ord) throws IOException { - delegateTermsEnum.seekExact(ord); + delegate().seekExact(ord); } @Override public final BytesRef term() throws IOException { - return delegateTermsEnum.term(); + return delegate().term(); } @Override public final long ord() throws IOException { - return delegateTermsEnum.ord(); + return delegate().ord(); } @Override public final int docFreq() throws IOException { - return delegateTermsEnum.docFreq(); + return delegate().docFreq(); } @Override public final long totalTermFreq() throws IOException { - return delegateTermsEnum.totalTermFreq(); + return delegate().totalTermFreq(); } @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { - return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags); + return delegate().docsAndPositions(liveDocs, reuse, flags); } @Override public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { - return delegateTermsEnum.docs(liveDocs, reuse, flags); + return delegate().docs(liveDocs, reuse, flags); } @@ -383,12 +398,10 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { private Map bloomFilters = new HashMap(); private SegmentWriteState state; - // private PostingsFormat delegatePostingsFormat; public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer, SegmentWriteState state, PostingsFormat delegatePostingsFormat) { this.delegateFieldsConsumer = fieldsConsumer; - // this.delegatePostingsFormat=delegatePostingsFormat; this.state = state; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 2b105de27d2..1a0cde63097 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -272,7 +272,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 54edfc7d92b..d403f131df4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -87,7 +87,7 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return in.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/Terms.java b/lucene/core/src/java/org/apache/lucene/index/Terms.java index 62925c30770..45924d40d0d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Terms.java +++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java @@ -80,7 +80,7 @@ public abstract class Terms { * if there are no terms. This method may be invoked * many times; it's best to cache a single instance & * reuse it. */ - public abstract Comparator getComparator() throws IOException; + public abstract Comparator getComparator(); /** Returns the number of terms for this field, or -1 if this * measure isn't stored by the codec. Note that, just like