SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything

* getProcessedFilter now returns null filter if it's all docs more reliably * getProcessedFilter now documented clearly as an internal method * getDocSet detects all-docs and exits early with getLiveDocs * small refactoring to getDocSetBits/makeDocSetBits Closes #1399
2020-04-02 23:53:04 -04:00 · 2020-04-02 23:53:04 -04:00 · 5bfbdc5325
parent de6233976a
commit 5bfbdc5325
2 changed files with 66 additions and 32 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -82,6 +82,8 @@ Improvements
 * SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley)
 * SOLR-14376: Optimize filter queries that match all docs. (David Smiley)
 Optimizations
 ---------------------
 * SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -760,6 +760,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  private BitDocSet makeBitDocSet(DocSet answer) {
    // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
    // or make DocSet instances remember maxDoc
    if (answer instanceof BitDocSet) {
      return (BitDocSet) answer;
    }
    FixedBitSet bs = new FixedBitSet(maxDoc());
    DocIterator iter = answer.iterator();
    while (iter.hasNext()) {
@ -771,11 +774,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  public BitDocSet getDocSetBits(Query q) throws IOException {
    DocSet answer = getDocSet(q);
    if (answer instanceof BitDocSet) {
      return (BitDocSet) answer;
    }
    BitDocSet answerBits = makeBitDocSet(answer);
-    if (filterCache != null) {
+    if (answerBits != answer && filterCache != null) {
      filterCache.put(q, answerBits);
    }
    return answerBits;
@ -879,19 +879,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  public void setLiveDocs(DocSet docs) {
    // a few places currently expect BitDocSet
    assert docs.size() == numDocs();
    if (docs instanceof BitDocSet) {
      this.liveDocs = (BitDocSet)docs;
    } else {
    this.liveDocs = makeBitDocSet(docs);
  }
  }
  public static class ProcessedFilter {
    public DocSet answer; // the answer, if non-null
    public Filter filter;
    public DelegatingCollector postFilter;
    public boolean hasDeletedDocs;  // true if it's possible that filter may match deleted docs
  }
  private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
@ -923,6 +912,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
   * Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
   * answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
   * result of this call. This method can handle negative queries.
   * A null/empty list results in {@link #getLiveDocSet()}.
   * <p>
   * The DocSet returned should <b>not</b> be modified.
   */
@ -937,7 +927,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    }
    ProcessedFilter pf = getProcessedFilter(null, queries);
-    if (pf.answer != null) return pf.answer;
+
    if (pf.postFilter == null) {
      if (pf.answer != null) {
        return pf.answer;
      } else if (pf.filter == null) {
        return getLiveDocSet(); // note: this is what happens when queries is an empty list
      }
    }
    DocSetCollector setCollector = new DocSetCollector(maxDoc());
    Collector collector = setCollector;
@ -991,13 +988,36 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    return DocSetUtil.getDocSet(setCollector, this);
  }
  /**
   * INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}.
   * Holds a filter and postFilter pair that together match a set of documents.
   * Either of them may be null, in which case the semantics are to match everything.
   * @see #getProcessedFilter(DocSet, List)
   */
  public static class ProcessedFilter {
    public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result.
    public Filter filter; // maybe null
    public DelegatingCollector postFilter; // maybe null
    public boolean hasDeletedDocs;  // true if it's possible that filter may match deleted docs
  }
  /**
   * INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result.
   * Either arg may be null/empty thus doesn't restrict the matching docs.
   * Queries typically are resolved against the filter cache, and populate it.
   */
  public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
    ProcessedFilter pf = new ProcessedFilter();
    if (queries == null || queries.size() == 0) {
-      if (setFilter != null) pf.filter = setFilter.getTopFilter();
+      if (setFilter != null) {
        pf.answer = setFilter;
        pf.filter = setFilter.getTopFilter();
      }
      return pf;
    }
    // We combine all the filter queries that come from the filter cache & setFilter into "answer".
    // This might become pf.filterAsDocSet but not if there are any non-cached filters
    DocSet answer = null;
    boolean[] neg = new boolean[queries.size() + 1];
@ -1011,7 +1031,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    if (setFilter != null) {
      answer = sets[end++] = setFilter;
      smallestIndex = end;
-    }
+    } // we are done with setFilter at this point
    int smallestCount = Integer.MAX_VALUE;
    for (Query q : queries) {
@ -1073,7 +1093,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
    }
-    if (notCached != null) {
+    // ignore "answer" if it simply matches all docs
    if (answer != null && answer.size() == numDocs()) {
      answer = null;
    }
    // answer is done.
    // If no notCached nor postFilters, we can return now.
    if (notCached == null && postFilters == null) {
      // "answer" is the only part of the filter, so set it.
      if (answer != null) {
        pf.answer = answer;
        pf.filter = answer.getTopFilter();
      }
      return pf;
    }
    // pf.answer will remain null ...  (our local "answer" var is not the complete answer)
    // Set pf.filter based on combining "answer" and "notCached"
    if (notCached == null) {
      if (answer != null) {
        pf.filter = answer.getTopFilter();
      }
    } else {
      Collections.sort(notCached, sortByCost);
      List<Weight> weights = new ArrayList<>(notCached.size());
      for (Query q : notCached) {
@ -1082,20 +1125,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      }
      pf.filter = new FilterImpl(answer, weights);
      pf.hasDeletedDocs = (answer == null);  // if all clauses were uncached, the resulting filter may match deleted docs
    } else {
      if (postFilters == null) {
        if (answer == null) {
          answer = getLiveDocSet();
        }
        // "answer" is the only part of the filter, so set it.
        pf.answer = answer;
      }
      if (answer != null) {
        pf.filter = answer.getTopFilter();
      }
    }
    // Set pf.postFilter
    if (postFilters != null) {
      Collections.sort(postFilters, sortByCost);
      for (int i = postFilters.size() - 1; i >= 0; i--) {