mirror of https://github.com/apache/lucene.git
SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything
* getProcessedFilter now returns null filter if it's all docs more reliably * getProcessedFilter now documented clearly as an internal method * getDocSet detects all-docs and exits early with getLiveDocs * small refactoring to getDocSetBits/makeDocSetBits Closes #1399
This commit is contained in:
parent
de6233976a
commit
5bfbdc5325
|
@ -82,6 +82,8 @@ Improvements
|
||||||
|
|
||||||
* SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley)
|
* SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley)
|
||||||
|
|
||||||
|
* SOLR-14376: Optimize filter queries that match all docs. (David Smiley)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)
|
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)
|
||||||
|
|
|
@ -760,6 +760,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
private BitDocSet makeBitDocSet(DocSet answer) {
|
private BitDocSet makeBitDocSet(DocSet answer) {
|
||||||
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
|
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
|
||||||
// or make DocSet instances remember maxDoc
|
// or make DocSet instances remember maxDoc
|
||||||
|
if (answer instanceof BitDocSet) {
|
||||||
|
return (BitDocSet) answer;
|
||||||
|
}
|
||||||
FixedBitSet bs = new FixedBitSet(maxDoc());
|
FixedBitSet bs = new FixedBitSet(maxDoc());
|
||||||
DocIterator iter = answer.iterator();
|
DocIterator iter = answer.iterator();
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
|
@ -771,11 +774,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
|
|
||||||
public BitDocSet getDocSetBits(Query q) throws IOException {
|
public BitDocSet getDocSetBits(Query q) throws IOException {
|
||||||
DocSet answer = getDocSet(q);
|
DocSet answer = getDocSet(q);
|
||||||
if (answer instanceof BitDocSet) {
|
|
||||||
return (BitDocSet) answer;
|
|
||||||
}
|
|
||||||
BitDocSet answerBits = makeBitDocSet(answer);
|
BitDocSet answerBits = makeBitDocSet(answer);
|
||||||
if (filterCache != null) {
|
if (answerBits != answer && filterCache != null) {
|
||||||
filterCache.put(q, answerBits);
|
filterCache.put(q, answerBits);
|
||||||
}
|
}
|
||||||
return answerBits;
|
return answerBits;
|
||||||
|
@ -879,19 +879,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
public void setLiveDocs(DocSet docs) {
|
public void setLiveDocs(DocSet docs) {
|
||||||
// a few places currently expect BitDocSet
|
// a few places currently expect BitDocSet
|
||||||
assert docs.size() == numDocs();
|
assert docs.size() == numDocs();
|
||||||
if (docs instanceof BitDocSet) {
|
|
||||||
this.liveDocs = (BitDocSet)docs;
|
|
||||||
} else {
|
|
||||||
this.liveDocs = makeBitDocSet(docs);
|
this.liveDocs = makeBitDocSet(docs);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public static class ProcessedFilter {
|
|
||||||
public DocSet answer; // the answer, if non-null
|
|
||||||
public Filter filter;
|
|
||||||
public DelegatingCollector postFilter;
|
|
||||||
public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
|
private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
|
||||||
|
|
||||||
|
@ -923,6 +912,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
* Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
|
* Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
|
||||||
* answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
|
* answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
|
||||||
* result of this call. This method can handle negative queries.
|
* result of this call. This method can handle negative queries.
|
||||||
|
* A null/empty list results in {@link #getLiveDocSet()}.
|
||||||
* <p>
|
* <p>
|
||||||
* The DocSet returned should <b>not</b> be modified.
|
* The DocSet returned should <b>not</b> be modified.
|
||||||
*/
|
*/
|
||||||
|
@ -937,7 +927,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
}
|
}
|
||||||
|
|
||||||
ProcessedFilter pf = getProcessedFilter(null, queries);
|
ProcessedFilter pf = getProcessedFilter(null, queries);
|
||||||
if (pf.answer != null) return pf.answer;
|
|
||||||
|
if (pf.postFilter == null) {
|
||||||
|
if (pf.answer != null) {
|
||||||
|
return pf.answer;
|
||||||
|
} else if (pf.filter == null) {
|
||||||
|
return getLiveDocSet(); // note: this is what happens when queries is an empty list
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DocSetCollector setCollector = new DocSetCollector(maxDoc());
|
DocSetCollector setCollector = new DocSetCollector(maxDoc());
|
||||||
Collector collector = setCollector;
|
Collector collector = setCollector;
|
||||||
|
@ -991,13 +988,36 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
return DocSetUtil.getDocSet(setCollector, this);
|
return DocSetUtil.getDocSet(setCollector, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}.
|
||||||
|
* Holds a filter and postFilter pair that together match a set of documents.
|
||||||
|
* Either of them may be null, in which case the semantics are to match everything.
|
||||||
|
* @see #getProcessedFilter(DocSet, List)
|
||||||
|
*/
|
||||||
|
public static class ProcessedFilter {
|
||||||
|
public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result.
|
||||||
|
public Filter filter; // maybe null
|
||||||
|
public DelegatingCollector postFilter; // maybe null
|
||||||
|
public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result.
|
||||||
|
* Either arg may be null/empty thus doesn't restrict the matching docs.
|
||||||
|
* Queries typically are resolved against the filter cache, and populate it.
|
||||||
|
*/
|
||||||
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
|
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
|
||||||
ProcessedFilter pf = new ProcessedFilter();
|
ProcessedFilter pf = new ProcessedFilter();
|
||||||
if (queries == null || queries.size() == 0) {
|
if (queries == null || queries.size() == 0) {
|
||||||
if (setFilter != null) pf.filter = setFilter.getTopFilter();
|
if (setFilter != null) {
|
||||||
|
pf.answer = setFilter;
|
||||||
|
pf.filter = setFilter.getTopFilter();
|
||||||
|
}
|
||||||
return pf;
|
return pf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We combine all the filter queries that come from the filter cache & setFilter into "answer".
|
||||||
|
// This might become pf.filterAsDocSet but not if there are any non-cached filters
|
||||||
DocSet answer = null;
|
DocSet answer = null;
|
||||||
|
|
||||||
boolean[] neg = new boolean[queries.size() + 1];
|
boolean[] neg = new boolean[queries.size() + 1];
|
||||||
|
@ -1011,7 +1031,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
if (setFilter != null) {
|
if (setFilter != null) {
|
||||||
answer = sets[end++] = setFilter;
|
answer = sets[end++] = setFilter;
|
||||||
smallestIndex = end;
|
smallestIndex = end;
|
||||||
}
|
} // we are done with setFilter at this point
|
||||||
|
|
||||||
int smallestCount = Integer.MAX_VALUE;
|
int smallestCount = Integer.MAX_VALUE;
|
||||||
for (Query q : queries) {
|
for (Query q : queries) {
|
||||||
|
@ -1073,7 +1093,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
|
if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (notCached != null) {
|
// ignore "answer" if it simply matches all docs
|
||||||
|
if (answer != null && answer.size() == numDocs()) {
|
||||||
|
answer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// answer is done.
|
||||||
|
|
||||||
|
// If no notCached nor postFilters, we can return now.
|
||||||
|
if (notCached == null && postFilters == null) {
|
||||||
|
// "answer" is the only part of the filter, so set it.
|
||||||
|
if (answer != null) {
|
||||||
|
pf.answer = answer;
|
||||||
|
pf.filter = answer.getTopFilter();
|
||||||
|
}
|
||||||
|
return pf;
|
||||||
|
}
|
||||||
|
// pf.answer will remain null ... (our local "answer" var is not the complete answer)
|
||||||
|
|
||||||
|
// Set pf.filter based on combining "answer" and "notCached"
|
||||||
|
if (notCached == null) {
|
||||||
|
if (answer != null) {
|
||||||
|
pf.filter = answer.getTopFilter();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
Collections.sort(notCached, sortByCost);
|
Collections.sort(notCached, sortByCost);
|
||||||
List<Weight> weights = new ArrayList<>(notCached.size());
|
List<Weight> weights = new ArrayList<>(notCached.size());
|
||||||
for (Query q : notCached) {
|
for (Query q : notCached) {
|
||||||
|
@ -1082,20 +1125,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
||||||
}
|
}
|
||||||
pf.filter = new FilterImpl(answer, weights);
|
pf.filter = new FilterImpl(answer, weights);
|
||||||
pf.hasDeletedDocs = (answer == null); // if all clauses were uncached, the resulting filter may match deleted docs
|
pf.hasDeletedDocs = (answer == null); // if all clauses were uncached, the resulting filter may match deleted docs
|
||||||
} else {
|
|
||||||
if (postFilters == null) {
|
|
||||||
if (answer == null) {
|
|
||||||
answer = getLiveDocSet();
|
|
||||||
}
|
|
||||||
// "answer" is the only part of the filter, so set it.
|
|
||||||
pf.answer = answer;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (answer != null) {
|
|
||||||
pf.filter = answer.getTopFilter();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set pf.postFilter
|
||||||
if (postFilters != null) {
|
if (postFilters != null) {
|
||||||
Collections.sort(postFilters, sortByCost);
|
Collections.sort(postFilters, sortByCost);
|
||||||
for (int i = postFilters.size() - 1; i >= 0; i--) {
|
for (int i = postFilters.size() - 1; i >= 0; i--) {
|
||||||
|
|
Loading…
Reference in New Issue