Changed the TermFilter to return a native DocIdSetIterator instead of the FixedBitSet's implementation.

This has two advantages in the case term filter is *not* cached: * We iterate only once over the matching docs. Before this fix we iterated once to create the FBS and another time the consume the matching docs from the FBS. * The DocIdSetIterator#cost method of a DocIdSetIterator from the DocsEnum is accurate, because it based on the document frequency whereas the cost method of the FBS' iterator impl is based on the total number of bits (which is based on maxDoc). This will make this filter execute faster when it is included in a filtered query, because the filtered query can base its decision on what strategy to pick on an accurate heuristic. This change doesn't have any negative implications in the case a filter is cached (which is the default). The FBS is now created lazily in the DocIdSets#toCacheable method, which is always invoked when the term filter needs to be cached.
2013-07-04 13:02:19 +02:00 · 2013-07-04 13:02:19 +02:00 · 953dda2aee
parent 018ca58cdb
commit 953dda2aee
2 changed files with 10 additions and 17 deletions
--- a/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java
+++ b/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java
@ -73,10 +73,7 @@ public class DocIdSets {
     * always either return {@link DocIdSet#EMPTY_DOCIDSET} or {@link FixedBitSet}.
     */
    public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
-        if (set == null) {
-            return DocIdSet.EMPTY_DOCIDSET;
-        }
-        if (set == DocIdSet.EMPTY_DOCIDSET) {
+        if (set == null || set == DocIdSet.EMPTY_DOCIDSET) {
            return DocIdSet.EMPTY_DOCIDSET;
        }
        DocIdSetIterator it = set.iterator();
--- a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java
+++ b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java
@ -21,9 +21,9 @@ package org.elasticsearch.common.lucene.search;

 import org.apache.lucene.index.*;
 import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.FixedBitSet;

 import java.io.IOException;

@ -43,27 +43,23 @@ public class TermFilter extends Filter {
    }

    @Override
-    public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+    public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
        Terms terms = context.reader().terms(term.field());
        if (terms == null) {
            return null;
        }

-        TermsEnum termsEnum = terms.iterator(null);
+        final TermsEnum termsEnum = terms.iterator(null);
        if (!termsEnum.seekExact(term.bytes(), false)) {
            return null;
        }
-        DocsEnum docsEnum = termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE);
-        int docId = docsEnum.nextDoc();
-        if (docId == DocsEnum.NO_MORE_DOCS) {
-            return null;
+        return new DocIdSet() {
+            @Override
+            public DocIdSetIterator iterator() throws IOException {
+                return termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE);
            }

-        final FixedBitSet result = new FixedBitSet(context.reader().maxDoc());
-        for (; docId < DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
-            result.set(docId);
-        }
-        return result;
+        };
    }

    @Override