From 953dda2aee70df657faa7353e43e3f61b192ce62 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 4 Jul 2013 13:02:19 +0200 Subject: [PATCH] Changed the TermFilter to return a native DocIdSetIterator instead of the FixedBitSet's implementation. This has two advantages in the case term filter is *not* cached: * We iterate only once over the matching docs. Before this fix we iterated once to create the FBS and another time the consume the matching docs from the FBS. * The DocIdSetIterator#cost method of a DocIdSetIterator from the DocsEnum is accurate, because it based on the document frequency whereas the cost method of the FBS' iterator impl is based on the total number of bits (which is based on maxDoc). This will make this filter execute faster when it is included in a filtered query, because the filtered query can base its decision on what strategy to pick on an accurate heuristic. This change doesn't have any negative implications in the case a filter is cached (which is the default). The FBS is now created lazily in the DocIdSets#toCacheable method, which is always invoked when the term filter needs to be cached. --- .../common/lucene/docset/DocIdSets.java | 5 +---- .../common/lucene/search/TermFilter.java | 22 ++++++++----------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java b/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java index e3dc7c8a016..aa4bde3f72a 100644 --- a/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java +++ b/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java @@ -73,10 +73,7 @@ public class DocIdSets { * always either return {@link DocIdSet#EMPTY_DOCIDSET} or {@link FixedBitSet}. */ public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException { - if (set == null) { - return DocIdSet.EMPTY_DOCIDSET; - } - if (set == DocIdSet.EMPTY_DOCIDSET) { + if (set == null || set == DocIdSet.EMPTY_DOCIDSET) { return DocIdSet.EMPTY_DOCIDSET; } DocIdSetIterator it = set.iterator(); diff --git a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java index 88f1349fc5a..9b0ef7e03ea 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java @@ -21,9 +21,9 @@ package org.elasticsearch.common.lucene.search; import org.apache.lucene.index.*; import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.FixedBitSet; import java.io.IOException; @@ -43,27 +43,23 @@ public class TermFilter extends Filter { } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { Terms terms = context.reader().terms(term.field()); if (terms == null) { return null; } - TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(term.bytes(), false)) { return null; } - DocsEnum docsEnum = termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE); - int docId = docsEnum.nextDoc(); - if (docId == DocsEnum.NO_MORE_DOCS) { - return null; - } + return new DocIdSet() { + @Override + public DocIdSetIterator iterator() throws IOException { + return termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE); + } - final FixedBitSet result = new FixedBitSet(context.reader().maxDoc()); - for (; docId < DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { - result.set(docId); - } - return result; + }; } @Override