mirror of https://github.com/apache/lucene.git
SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything
* getProcessedFilter now returns null filter if it's all docs more reliably * getProcessedFilter now documented clearly as an internal method * getDocSet detects all-docs and exits early with getLiveDocs * small refactoring to getDocSetBits/makeDocSetBits Closes #1399
This commit is contained in:
parent
de6233976a
commit
5bfbdc5325
|
@ -82,6 +82,8 @@ Improvements
|
|||
|
||||
* SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley)
|
||||
|
||||
* SOLR-14376: Optimize filter queries that match all docs. (David Smiley)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)
|
||||
|
|
|
@ -760,6 +760,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
private BitDocSet makeBitDocSet(DocSet answer) {
|
||||
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
|
||||
// or make DocSet instances remember maxDoc
|
||||
if (answer instanceof BitDocSet) {
|
||||
return (BitDocSet) answer;
|
||||
}
|
||||
FixedBitSet bs = new FixedBitSet(maxDoc());
|
||||
DocIterator iter = answer.iterator();
|
||||
while (iter.hasNext()) {
|
||||
|
@ -771,11 +774,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
|
||||
public BitDocSet getDocSetBits(Query q) throws IOException {
|
||||
DocSet answer = getDocSet(q);
|
||||
if (answer instanceof BitDocSet) {
|
||||
return (BitDocSet) answer;
|
||||
}
|
||||
BitDocSet answerBits = makeBitDocSet(answer);
|
||||
if (filterCache != null) {
|
||||
if (answerBits != answer && filterCache != null) {
|
||||
filterCache.put(q, answerBits);
|
||||
}
|
||||
return answerBits;
|
||||
|
@ -879,18 +879,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
public void setLiveDocs(DocSet docs) {
|
||||
// a few places currently expect BitDocSet
|
||||
assert docs.size() == numDocs();
|
||||
if (docs instanceof BitDocSet) {
|
||||
this.liveDocs = (BitDocSet)docs;
|
||||
} else {
|
||||
this.liveDocs = makeBitDocSet(docs);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ProcessedFilter {
|
||||
public DocSet answer; // the answer, if non-null
|
||||
public Filter filter;
|
||||
public DelegatingCollector postFilter;
|
||||
public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
|
||||
this.liveDocs = makeBitDocSet(docs);
|
||||
}
|
||||
|
||||
private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
|
||||
|
@ -923,6 +912,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
* Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
|
||||
* answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
|
||||
* result of this call. This method can handle negative queries.
|
||||
* A null/empty list results in {@link #getLiveDocSet()}.
|
||||
* <p>
|
||||
* The DocSet returned should <b>not</b> be modified.
|
||||
*/
|
||||
|
@ -937,7 +927,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
}
|
||||
|
||||
ProcessedFilter pf = getProcessedFilter(null, queries);
|
||||
if (pf.answer != null) return pf.answer;
|
||||
|
||||
if (pf.postFilter == null) {
|
||||
if (pf.answer != null) {
|
||||
return pf.answer;
|
||||
} else if (pf.filter == null) {
|
||||
return getLiveDocSet(); // note: this is what happens when queries is an empty list
|
||||
}
|
||||
}
|
||||
|
||||
DocSetCollector setCollector = new DocSetCollector(maxDoc());
|
||||
Collector collector = setCollector;
|
||||
|
@ -991,13 +988,36 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
return DocSetUtil.getDocSet(setCollector, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}.
|
||||
* Holds a filter and postFilter pair that together match a set of documents.
|
||||
* Either of them may be null, in which case the semantics are to match everything.
|
||||
* @see #getProcessedFilter(DocSet, List)
|
||||
*/
|
||||
public static class ProcessedFilter {
|
||||
public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result.
|
||||
public Filter filter; // maybe null
|
||||
public DelegatingCollector postFilter; // maybe null
|
||||
public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result.
|
||||
* Either arg may be null/empty thus doesn't restrict the matching docs.
|
||||
* Queries typically are resolved against the filter cache, and populate it.
|
||||
*/
|
||||
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
|
||||
ProcessedFilter pf = new ProcessedFilter();
|
||||
if (queries == null || queries.size() == 0) {
|
||||
if (setFilter != null) pf.filter = setFilter.getTopFilter();
|
||||
if (setFilter != null) {
|
||||
pf.answer = setFilter;
|
||||
pf.filter = setFilter.getTopFilter();
|
||||
}
|
||||
return pf;
|
||||
}
|
||||
|
||||
// We combine all the filter queries that come from the filter cache & setFilter into "answer".
|
||||
// This might become pf.filterAsDocSet but not if there are any non-cached filters
|
||||
DocSet answer = null;
|
||||
|
||||
boolean[] neg = new boolean[queries.size() + 1];
|
||||
|
@ -1011,7 +1031,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
if (setFilter != null) {
|
||||
answer = sets[end++] = setFilter;
|
||||
smallestIndex = end;
|
||||
}
|
||||
} // we are done with setFilter at this point
|
||||
|
||||
int smallestCount = Integer.MAX_VALUE;
|
||||
for (Query q : queries) {
|
||||
|
@ -1073,7 +1093,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
|
||||
}
|
||||
|
||||
if (notCached != null) {
|
||||
// ignore "answer" if it simply matches all docs
|
||||
if (answer != null && answer.size() == numDocs()) {
|
||||
answer = null;
|
||||
}
|
||||
|
||||
// answer is done.
|
||||
|
||||
// If no notCached nor postFilters, we can return now.
|
||||
if (notCached == null && postFilters == null) {
|
||||
// "answer" is the only part of the filter, so set it.
|
||||
if (answer != null) {
|
||||
pf.answer = answer;
|
||||
pf.filter = answer.getTopFilter();
|
||||
}
|
||||
return pf;
|
||||
}
|
||||
// pf.answer will remain null ... (our local "answer" var is not the complete answer)
|
||||
|
||||
// Set pf.filter based on combining "answer" and "notCached"
|
||||
if (notCached == null) {
|
||||
if (answer != null) {
|
||||
pf.filter = answer.getTopFilter();
|
||||
}
|
||||
} else {
|
||||
Collections.sort(notCached, sortByCost);
|
||||
List<Weight> weights = new ArrayList<>(notCached.size());
|
||||
for (Query q : notCached) {
|
||||
|
@ -1082,20 +1125,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
}
|
||||
pf.filter = new FilterImpl(answer, weights);
|
||||
pf.hasDeletedDocs = (answer == null); // if all clauses were uncached, the resulting filter may match deleted docs
|
||||
} else {
|
||||
if (postFilters == null) {
|
||||
if (answer == null) {
|
||||
answer = getLiveDocSet();
|
||||
}
|
||||
// "answer" is the only part of the filter, so set it.
|
||||
pf.answer = answer;
|
||||
}
|
||||
|
||||
if (answer != null) {
|
||||
pf.filter = answer.getTopFilter();
|
||||
}
|
||||
}
|
||||
|
||||
// Set pf.postFilter
|
||||
if (postFilters != null) {
|
||||
Collections.sort(postFilters, sortByCost);
|
||||
for (int i = postFilters.size() - 1; i >= 0; i--) {
|
||||
|
|
Loading…
Reference in New Issue