mirror of https://github.com/apache/lucene.git
SOLR-8306: Optimize expand.rows=0 to compute only total hits (#1334)
* When expand.rows=0, expand documents are not returned. So, computing them could be avoided and only total hits need to be computed
This commit is contained in:
parent
bf25e6566d
commit
fda935482b
|
@ -57,7 +57,7 @@ Improvements
|
|||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
import org.apache.lucene.search.TotalHits;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -420,30 +421,30 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
CharsRefBuilder charsRef = new CharsRefBuilder();
|
||||
for (LongObjectCursor<Collector> cursor : groups) {
|
||||
long groupValue = cursor.key;
|
||||
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
|
||||
TopDocs topDocs = topDocsCollector.topDocs();
|
||||
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
||||
if (scoreDocs.length > 0) {
|
||||
if (returnFields.wantsScore() && sort != null) {
|
||||
TopFieldCollector.populateScores(scoreDocs, searcher, query);
|
||||
if (cursor.value instanceof TopDocsCollector) {
|
||||
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
|
||||
TopDocs topDocs = topDocsCollector.topDocs();
|
||||
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
||||
if (scoreDocs.length > 0) {
|
||||
if (returnFields.wantsScore() && sort != null) {
|
||||
TopFieldCollector.populateScores(scoreDocs, searcher, query);
|
||||
}
|
||||
int[] docs = new int[scoreDocs.length];
|
||||
float[] scores = new float[scoreDocs.length];
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
ScoreDoc scoreDoc = scoreDocs[i];
|
||||
docs[i] = scoreDoc.doc;
|
||||
scores[i] = scoreDoc.score;
|
||||
}
|
||||
assert topDocs.totalHits.relation == TotalHits.Relation.EQUAL_TO;
|
||||
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits.value, Float.NaN);
|
||||
addGroupSliceToOutputMap(fieldType, ordBytes, outMap, charsRef, groupValue, slice);
|
||||
}
|
||||
int[] docs = new int[scoreDocs.length];
|
||||
float[] scores = new float[scoreDocs.length];
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
ScoreDoc scoreDoc = scoreDocs[i];
|
||||
docs[i] = scoreDoc.doc;
|
||||
scores[i] = scoreDoc.score;
|
||||
}
|
||||
assert topDocs.totalHits.relation == TotalHits.Relation.EQUAL_TO;
|
||||
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits.value, Float.NaN);
|
||||
|
||||
if(fieldType instanceof StrField) {
|
||||
final BytesRef bytesRef = ordBytes.get((int)groupValue);
|
||||
fieldType.indexedToReadable(bytesRef, charsRef);
|
||||
String group = charsRef.toString();
|
||||
outMap.add(group, slice);
|
||||
} else {
|
||||
outMap.add(numericToString(fieldType, groupValue), slice);
|
||||
} else {
|
||||
int totalHits = ((TotalHitCountCollector) cursor.value).getTotalHits();
|
||||
if (totalHits > 0) {
|
||||
DocSlice slice = new DocSlice(0, 0, null, null, totalHits, 0);
|
||||
addGroupSliceToOutputMap(fieldType, ordBytes, outMap, charsRef, groupValue, slice);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -451,6 +452,17 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
rb.rsp.add("expanded", outMap);
|
||||
}
|
||||
|
||||
private void addGroupSliceToOutputMap(FieldType fieldType, IntObjectHashMap<BytesRef> ordBytes, NamedList outMap, CharsRefBuilder charsRef, long groupValue, DocSlice slice) {
|
||||
if(fieldType instanceof StrField) {
|
||||
final BytesRef bytesRef = ordBytes.get((int)groupValue);
|
||||
fieldType.indexedToReadable(bytesRef, charsRef);
|
||||
String group = charsRef.toString();
|
||||
outMap.add(group, slice);
|
||||
} else {
|
||||
outMap.add(numericToString(fieldType, groupValue), slice);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int distributedProcess(ResponseBuilder rb) throws IOException {
|
||||
if (rb.doExpand && rb.stage < finishingStage) {
|
||||
|
@ -533,8 +545,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
DocIdSetIterator iterator = new BitSetIterator(groupBits, 0); // cost is not useful here
|
||||
int group;
|
||||
while ((group = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit, Integer.MAX_VALUE) : TopFieldCollector.create(sort, limit, Integer.MAX_VALUE);
|
||||
groups.put(group, collector);
|
||||
groups.put(group, getCollector(limit, sort));
|
||||
}
|
||||
|
||||
this.collapsedSet = collapsedSet;
|
||||
|
@ -614,11 +625,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
int numGroups = collapsedSet.size();
|
||||
this.nullValue = nullValue;
|
||||
groups = new LongObjectHashMap<>(numGroups);
|
||||
Iterator<LongCursor> iterator = groupSet.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
LongCursor cursor = iterator.next();
|
||||
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit, Integer.MAX_VALUE) : TopFieldCollector.create(sort, limit, Integer.MAX_VALUE);
|
||||
groups.put(cursor.value, collector);
|
||||
for (LongCursor cursor : groupSet) {
|
||||
groups.put(cursor.value, getCollector(limit, sort));
|
||||
}
|
||||
|
||||
this.field = field;
|
||||
|
@ -681,6 +689,18 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
return groups.iterator().next().value.scoreMode(); // we assume all the collectors should have the same nature
|
||||
}
|
||||
}
|
||||
|
||||
default Collector getCollector(int limit, Sort sort) throws IOException {
|
||||
Collector collector;
|
||||
if (limit == 0) {
|
||||
collector = new TotalHitCountCollector();
|
||||
} else if (sort == null) {
|
||||
collector = TopScoreDocCollector.create(limit, Integer.MAX_VALUE);
|
||||
} else {
|
||||
collector = TopFieldCollector.create(sort, limit, Integer.MAX_VALUE);
|
||||
}
|
||||
return collector;
|
||||
}
|
||||
}
|
||||
|
||||
private Query getGroupQuery(String fname,
|
||||
|
|
|
@ -56,7 +56,7 @@ public class DocSlice implements DocList, Accountable {
|
|||
this.scores=scores;
|
||||
this.matches=matches;
|
||||
this.maxScore=maxScore;
|
||||
this.ramBytesUsed = BASE_RAM_BYTES_USED + ((long)docs.length << 2) + (scores == null ? 0 : ((long)scores.length<<2)+RamUsageEstimator.NUM_BYTES_ARRAY_HEADER);
|
||||
this.ramBytesUsed = BASE_RAM_BYTES_USED + (docs == null ? 0 : ((long)docs.length << 2)) + (scores == null ? 0 : ((long)scores.length<<2)+RamUsageEstimator.NUM_BYTES_ARRAY_HEADER);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -153,6 +153,42 @@ public class DistributedExpandComponentTest extends BaseDistributedSearchTestCas
|
|||
assertExpandGroupCountAndOrder("group3", 1, results, "9");
|
||||
assertExpandGroupCountAndOrder("group4", 1, results, "14");
|
||||
|
||||
//Test expand.rows = 0 - no docs only expand count
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "*:*");
|
||||
params.add("fq", "{!collapse field="+group+"}");
|
||||
params.add("defType", "edismax");
|
||||
params.add("bf", "field(test_i)");
|
||||
params.add("expand", "true");
|
||||
params.add("expand.rows", "0");
|
||||
params.add("fl", "id");
|
||||
setDistributedParams(params);
|
||||
rsp = queryServer(params);
|
||||
results = rsp.getExpandedResults();
|
||||
assertExpandGroups(results, "group1","group2", "group3", "group4");
|
||||
assertExpandGroupCountAndOrder("group1", 0, results);
|
||||
assertExpandGroupCountAndOrder("group2", 0, results);
|
||||
assertExpandGroupCountAndOrder("group3", 0, results);
|
||||
assertExpandGroupCountAndOrder("group4", 0, results);
|
||||
|
||||
//Test expand.rows = 0 with expand.field
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "*:*");
|
||||
params.add("fq", "test_l:10");
|
||||
params.add("defType", "edismax");
|
||||
params.add("expand", "true");
|
||||
params.add("expand.fq", "test_f:2000");
|
||||
params.add("expand.field", group);
|
||||
params.add("expand.rows", "0");
|
||||
params.add("fl", "id,score");
|
||||
setDistributedParams(params);
|
||||
rsp = queryServer(params);
|
||||
results = rsp.getExpandedResults();
|
||||
assertExpandGroups(results, "group1", "group4");
|
||||
assertExpandGroupCountAndOrder("group1", 0, results);
|
||||
assertExpandGroupCountAndOrder("group4", 0, results);
|
||||
|
||||
//Test key-only fl
|
||||
|
||||
|
|
|
@ -234,6 +234,60 @@ public class TestExpandComponent extends SolrTestCaseJ4 {
|
|||
"/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc[1]/str[@name='id'][.='8']"
|
||||
);
|
||||
|
||||
//Test expand.rows = 0 - no docs only expand count
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "*:*");
|
||||
params.add("fq", "{!collapse field="+group+hint+"}");
|
||||
params.add("defType", "edismax");
|
||||
params.add("bf", "field(test_i)");
|
||||
params.add("expand", "true");
|
||||
params.add("expand.rows", "0");
|
||||
assertQ(req(params), "*[count(/response/result/doc)=2]",
|
||||
"*[count(/response/lst[@name='expanded']/result)=2]",
|
||||
"*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]",
|
||||
"*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]",
|
||||
"/response/result/doc[1]/str[@name='id'][.='2']",
|
||||
"/response/result/doc[2]/str[@name='id'][.='6']"
|
||||
);
|
||||
|
||||
//Test expand.rows = 0 with expand.field
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "*:*");
|
||||
params.add("fq", "type_s:parent");
|
||||
params.add("defType", "edismax");
|
||||
params.add("bf", "field(test_i)");
|
||||
params.add("expand", "true");
|
||||
params.add("expand.fq", "type_s:child");
|
||||
params.add("expand.field", group);
|
||||
params.add("expand.rows", "0");
|
||||
assertQ(req(params, "fl", "id"), "*[count(/response/result/doc)=2]",
|
||||
"*[count(/response/lst[@name='expanded']/result)=2]",
|
||||
"*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]",
|
||||
"*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]",
|
||||
"/response/result/doc[1]/str[@name='id'][.='1']",
|
||||
"/response/result/doc[2]/str[@name='id'][.='5']"
|
||||
);
|
||||
|
||||
//Test score with expand.rows = 0
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "*:*");
|
||||
params.add("fq", "type_s:parent");
|
||||
params.add("defType", "edismax");
|
||||
params.add("bf", "field(test_i)");
|
||||
params.add("expand", "true");
|
||||
params.add("expand.fq", "*:*");
|
||||
params.add("expand.field", group);
|
||||
params.add("expand.rows", "0");
|
||||
assertQ(req(params, "fl", "id,score"), "*[count(/response/result/doc)=2]",
|
||||
"*[count(/response/lst[@name='expanded']/result)=2]",
|
||||
"*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]",
|
||||
"*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]",
|
||||
"*[count(/response/lst[@name='expanded']/result[@maxScore])=0]", //maxScore should not be available
|
||||
"/response/result/doc[1]/str[@name='id'][.='1']",
|
||||
"/response/result/doc[2]/str[@name='id'][.='5']",
|
||||
"count(//*[@name='score' and .='NaN'])=0"
|
||||
|
||||
);
|
||||
|
||||
//Test no group results
|
||||
params = new ModifiableSolrParams();
|
||||
|
|
|
@ -150,6 +150,11 @@ Orders the documents within the expanded groups. The default is `score desc`.
|
|||
`expand.rows`::
|
||||
The number of rows to display in each group. The default is 5 rows.
|
||||
|
||||
[IMPORTANT]
|
||||
====
|
||||
When `expand.rows=0`, then only the number of documents found for each expanded value is returned. Hence, scores won't be computed even if requested. `maxScore` is set to 0
|
||||
====
|
||||
|
||||
`expand.q`::
|
||||
Overrides the main query (`q`), determines which documents to include in the main group. The default is to use the main query.
|
||||
|
||||
|
|
Loading…
Reference in New Issue