From 5bfbdc5325aa331549e9700734042c9b582fc3fc Mon Sep 17 00:00:00 2001 From: David Smiley Date: Thu, 2 Apr 2020 23:53:04 -0400 Subject: [PATCH] SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything * getProcessedFilter now returns null filter if it's all docs more reliably * getProcessedFilter now documented clearly as an internal method * getDocSet detects all-docs and exits early with getLiveDocs * small refactoring to getDocSetBits/makeDocSetBits Closes #1399 --- solr/CHANGES.txt | 2 + .../apache/solr/search/SolrIndexSearcher.java | 96 ++++++++++++------- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2daa3648bac..30c7f0e444b 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -82,6 +82,8 @@ Improvements * SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley) +* SOLR-14376: Optimize filter queries that match all docs. (David Smiley) + Optimizations --------------------- * SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson) diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index d639b891039..0deb7cfe039 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -760,6 +760,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI private BitDocSet makeBitDocSet(DocSet answer) { // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument // or make DocSet instances remember maxDoc + if (answer instanceof BitDocSet) { + return (BitDocSet) answer; + } FixedBitSet bs = new FixedBitSet(maxDoc()); DocIterator iter = answer.iterator(); while (iter.hasNext()) { @@ -771,11 +774,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI public BitDocSet getDocSetBits(Query q) throws IOException { DocSet answer = getDocSet(q); - if (answer instanceof BitDocSet) { - return (BitDocSet) answer; - } BitDocSet answerBits = makeBitDocSet(answer); - if (filterCache != null) { + if (answerBits != answer && filterCache != null) { filterCache.put(q, answerBits); } return answerBits; @@ -879,18 +879,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI public void setLiveDocs(DocSet docs) { // a few places currently expect BitDocSet assert docs.size() == numDocs(); - if (docs instanceof BitDocSet) { - this.liveDocs = (BitDocSet)docs; - } else { - this.liveDocs = makeBitDocSet(docs); - } - } - - public static class ProcessedFilter { - public DocSet answer; // the answer, if non-null - public Filter filter; - public DelegatingCollector postFilter; - public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs + this.liveDocs = makeBitDocSet(docs); } private static Comparator sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost(); @@ -923,6 +912,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI * Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the * answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a * result of this call. This method can handle negative queries. + * A null/empty list results in {@link #getLiveDocSet()}. *

* The DocSet returned should not be modified. */ @@ -937,7 +927,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI } ProcessedFilter pf = getProcessedFilter(null, queries); - if (pf.answer != null) return pf.answer; + + if (pf.postFilter == null) { + if (pf.answer != null) { + return pf.answer; + } else if (pf.filter == null) { + return getLiveDocSet(); // note: this is what happens when queries is an empty list + } + } DocSetCollector setCollector = new DocSetCollector(maxDoc()); Collector collector = setCollector; @@ -991,13 +988,36 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI return DocSetUtil.getDocSet(setCollector, this); } + /** + * INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}. + * Holds a filter and postFilter pair that together match a set of documents. + * Either of them may be null, in which case the semantics are to match everything. + * @see #getProcessedFilter(DocSet, List) + */ + public static class ProcessedFilter { + public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result. + public Filter filter; // maybe null + public DelegatingCollector postFilter; // maybe null + public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs + } + + /** + * INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result. + * Either arg may be null/empty thus doesn't restrict the matching docs. + * Queries typically are resolved against the filter cache, and populate it. + */ public ProcessedFilter getProcessedFilter(DocSet setFilter, List queries) throws IOException { ProcessedFilter pf = new ProcessedFilter(); if (queries == null || queries.size() == 0) { - if (setFilter != null) pf.filter = setFilter.getTopFilter(); + if (setFilter != null) { + pf.answer = setFilter; + pf.filter = setFilter.getTopFilter(); + } return pf; } + // We combine all the filter queries that come from the filter cache & setFilter into "answer". + // This might become pf.filterAsDocSet but not if there are any non-cached filters DocSet answer = null; boolean[] neg = new boolean[queries.size() + 1]; @@ -1011,7 +1031,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI if (setFilter != null) { answer = sets[end++] = setFilter; smallestIndex = end; - } + } // we are done with setFilter at this point int smallestCount = Integer.MAX_VALUE; for (Query q : queries) { @@ -1073,7 +1093,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]); } - if (notCached != null) { + // ignore "answer" if it simply matches all docs + if (answer != null && answer.size() == numDocs()) { + answer = null; + } + + // answer is done. + + // If no notCached nor postFilters, we can return now. + if (notCached == null && postFilters == null) { + // "answer" is the only part of the filter, so set it. + if (answer != null) { + pf.answer = answer; + pf.filter = answer.getTopFilter(); + } + return pf; + } + // pf.answer will remain null ... (our local "answer" var is not the complete answer) + + // Set pf.filter based on combining "answer" and "notCached" + if (notCached == null) { + if (answer != null) { + pf.filter = answer.getTopFilter(); + } + } else { Collections.sort(notCached, sortByCost); List weights = new ArrayList<>(notCached.size()); for (Query q : notCached) { @@ -1082,20 +1125,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI } pf.filter = new FilterImpl(answer, weights); pf.hasDeletedDocs = (answer == null); // if all clauses were uncached, the resulting filter may match deleted docs - } else { - if (postFilters == null) { - if (answer == null) { - answer = getLiveDocSet(); - } - // "answer" is the only part of the filter, so set it. - pf.answer = answer; - } - - if (answer != null) { - pf.filter = answer.getTopFilter(); - } } + // Set pf.postFilter if (postFilters != null) { Collections.sort(postFilters, sortByCost); for (int i = postFilters.size() - 1; i >= 0; i--) {