SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything

* getProcessedFilter now returns null filter if it's all docs more reliably
* getProcessedFilter now documented clearly as an internal method
* getDocSet detects all-docs and exits early with getLiveDocs
* small refactoring to getDocSetBits/makeDocSetBits
Closes #1399
This commit is contained in:
David Smiley 2020-04-02 23:53:04 -04:00
parent de6233976a
commit 5bfbdc5325
2 changed files with 66 additions and 32 deletions

View File

@ -82,6 +82,8 @@ Improvements
* SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley) * SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley)
* SOLR-14376: Optimize filter queries that match all docs. (David Smiley)
Optimizations Optimizations
--------------------- ---------------------
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson) * SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)

View File

@ -760,6 +760,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private BitDocSet makeBitDocSet(DocSet answer) { private BitDocSet makeBitDocSet(DocSet answer) {
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
// or make DocSet instances remember maxDoc // or make DocSet instances remember maxDoc
if (answer instanceof BitDocSet) {
return (BitDocSet) answer;
}
FixedBitSet bs = new FixedBitSet(maxDoc()); FixedBitSet bs = new FixedBitSet(maxDoc());
DocIterator iter = answer.iterator(); DocIterator iter = answer.iterator();
while (iter.hasNext()) { while (iter.hasNext()) {
@ -771,11 +774,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
public BitDocSet getDocSetBits(Query q) throws IOException { public BitDocSet getDocSetBits(Query q) throws IOException {
DocSet answer = getDocSet(q); DocSet answer = getDocSet(q);
if (answer instanceof BitDocSet) {
return (BitDocSet) answer;
}
BitDocSet answerBits = makeBitDocSet(answer); BitDocSet answerBits = makeBitDocSet(answer);
if (filterCache != null) { if (answerBits != answer && filterCache != null) {
filterCache.put(q, answerBits); filterCache.put(q, answerBits);
} }
return answerBits; return answerBits;
@ -879,19 +879,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
public void setLiveDocs(DocSet docs) { public void setLiveDocs(DocSet docs) {
// a few places currently expect BitDocSet // a few places currently expect BitDocSet
assert docs.size() == numDocs(); assert docs.size() == numDocs();
if (docs instanceof BitDocSet) {
this.liveDocs = (BitDocSet)docs;
} else {
this.liveDocs = makeBitDocSet(docs); this.liveDocs = makeBitDocSet(docs);
} }
}
public static class ProcessedFilter {
public DocSet answer; // the answer, if non-null
public Filter filter;
public DelegatingCollector postFilter;
public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
}
private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost(); private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
@ -923,6 +912,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
* Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the * Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
* answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a * answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
* result of this call. This method can handle negative queries. * result of this call. This method can handle negative queries.
* A null/empty list results in {@link #getLiveDocSet()}.
* <p> * <p>
* The DocSet returned should <b>not</b> be modified. * The DocSet returned should <b>not</b> be modified.
*/ */
@ -937,7 +927,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
} }
ProcessedFilter pf = getProcessedFilter(null, queries); ProcessedFilter pf = getProcessedFilter(null, queries);
if (pf.answer != null) return pf.answer;
if (pf.postFilter == null) {
if (pf.answer != null) {
return pf.answer;
} else if (pf.filter == null) {
return getLiveDocSet(); // note: this is what happens when queries is an empty list
}
}
DocSetCollector setCollector = new DocSetCollector(maxDoc()); DocSetCollector setCollector = new DocSetCollector(maxDoc());
Collector collector = setCollector; Collector collector = setCollector;
@ -991,13 +988,36 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return DocSetUtil.getDocSet(setCollector, this); return DocSetUtil.getDocSet(setCollector, this);
} }
/**
* INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}.
* Holds a filter and postFilter pair that together match a set of documents.
* Either of them may be null, in which case the semantics are to match everything.
* @see #getProcessedFilter(DocSet, List)
*/
public static class ProcessedFilter {
public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result.
public Filter filter; // maybe null
public DelegatingCollector postFilter; // maybe null
public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
}
/**
* INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result.
* Either arg may be null/empty thus doesn't restrict the matching docs.
* Queries typically are resolved against the filter cache, and populate it.
*/
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException { public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
ProcessedFilter pf = new ProcessedFilter(); ProcessedFilter pf = new ProcessedFilter();
if (queries == null || queries.size() == 0) { if (queries == null || queries.size() == 0) {
if (setFilter != null) pf.filter = setFilter.getTopFilter(); if (setFilter != null) {
pf.answer = setFilter;
pf.filter = setFilter.getTopFilter();
}
return pf; return pf;
} }
// We combine all the filter queries that come from the filter cache & setFilter into "answer".
// This might become pf.filterAsDocSet but not if there are any non-cached filters
DocSet answer = null; DocSet answer = null;
boolean[] neg = new boolean[queries.size() + 1]; boolean[] neg = new boolean[queries.size() + 1];
@ -1011,7 +1031,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
if (setFilter != null) { if (setFilter != null) {
answer = sets[end++] = setFilter; answer = sets[end++] = setFilter;
smallestIndex = end; smallestIndex = end;
} } // we are done with setFilter at this point
int smallestCount = Integer.MAX_VALUE; int smallestCount = Integer.MAX_VALUE;
for (Query q : queries) { for (Query q : queries) {
@ -1073,7 +1093,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]); if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
} }
if (notCached != null) { // ignore "answer" if it simply matches all docs
if (answer != null && answer.size() == numDocs()) {
answer = null;
}
// answer is done.
// If no notCached nor postFilters, we can return now.
if (notCached == null && postFilters == null) {
// "answer" is the only part of the filter, so set it.
if (answer != null) {
pf.answer = answer;
pf.filter = answer.getTopFilter();
}
return pf;
}
// pf.answer will remain null ... (our local "answer" var is not the complete answer)
// Set pf.filter based on combining "answer" and "notCached"
if (notCached == null) {
if (answer != null) {
pf.filter = answer.getTopFilter();
}
} else {
Collections.sort(notCached, sortByCost); Collections.sort(notCached, sortByCost);
List<Weight> weights = new ArrayList<>(notCached.size()); List<Weight> weights = new ArrayList<>(notCached.size());
for (Query q : notCached) { for (Query q : notCached) {
@ -1082,20 +1125,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
} }
pf.filter = new FilterImpl(answer, weights); pf.filter = new FilterImpl(answer, weights);
pf.hasDeletedDocs = (answer == null); // if all clauses were uncached, the resulting filter may match deleted docs pf.hasDeletedDocs = (answer == null); // if all clauses were uncached, the resulting filter may match deleted docs
} else {
if (postFilters == null) {
if (answer == null) {
answer = getLiveDocSet();
}
// "answer" is the only part of the filter, so set it.
pf.answer = answer;
}
if (answer != null) {
pf.filter = answer.getTopFilter();
}
} }
// Set pf.postFilter
if (postFilters != null) { if (postFilters != null) {
Collections.sort(postFilters, sortByCost); Collections.sort(postFilters, sortByCost);
for (int i = postFilters.size() - 1; i >= 0; i--) { for (int i = postFilters.size() - 1; i >= 0; i--) {