SOLR-8306: Optimize expand.rows=0 to compute only total hits (#1334)

* When expand.rows=0, expand documents are not returned. So, computing them could be avoided and only total hits need to be computed
This commit is contained in:
ameliahenderson 2020-03-17 23:53:04 -04:00 committed by GitHub
parent bf25e6566d
commit fda935482b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 147 additions and 32 deletions

View File

@ -57,7 +57,7 @@ Improvements
Optimizations
---------------------
(No changes)
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)
Bug Fixes
---------------------

View File

@ -53,6 +53,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
@ -420,30 +421,30 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
CharsRefBuilder charsRef = new CharsRefBuilder();
for (LongObjectCursor<Collector> cursor : groups) {
long groupValue = cursor.key;
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
TopDocs topDocs = topDocsCollector.topDocs();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
if (returnFields.wantsScore() && sort != null) {
TopFieldCollector.populateScores(scoreDocs, searcher, query);
if (cursor.value instanceof TopDocsCollector) {
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
TopDocs topDocs = topDocsCollector.topDocs();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
if (returnFields.wantsScore() && sort != null) {
TopFieldCollector.populateScores(scoreDocs, searcher, query);
}
int[] docs = new int[scoreDocs.length];
float[] scores = new float[scoreDocs.length];
for (int i = 0; i < docs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
docs[i] = scoreDoc.doc;
scores[i] = scoreDoc.score;
}
assert topDocs.totalHits.relation == TotalHits.Relation.EQUAL_TO;
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits.value, Float.NaN);
addGroupSliceToOutputMap(fieldType, ordBytes, outMap, charsRef, groupValue, slice);
}
int[] docs = new int[scoreDocs.length];
float[] scores = new float[scoreDocs.length];
for (int i = 0; i < docs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
docs[i] = scoreDoc.doc;
scores[i] = scoreDoc.score;
}
assert topDocs.totalHits.relation == TotalHits.Relation.EQUAL_TO;
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits.value, Float.NaN);
if(fieldType instanceof StrField) {
final BytesRef bytesRef = ordBytes.get((int)groupValue);
fieldType.indexedToReadable(bytesRef, charsRef);
String group = charsRef.toString();
outMap.add(group, slice);
} else {
outMap.add(numericToString(fieldType, groupValue), slice);
} else {
int totalHits = ((TotalHitCountCollector) cursor.value).getTotalHits();
if (totalHits > 0) {
DocSlice slice = new DocSlice(0, 0, null, null, totalHits, 0);
addGroupSliceToOutputMap(fieldType, ordBytes, outMap, charsRef, groupValue, slice);
}
}
}
@ -451,6 +452,17 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
rb.rsp.add("expanded", outMap);
}
private void addGroupSliceToOutputMap(FieldType fieldType, IntObjectHashMap<BytesRef> ordBytes, NamedList outMap, CharsRefBuilder charsRef, long groupValue, DocSlice slice) {
if(fieldType instanceof StrField) {
final BytesRef bytesRef = ordBytes.get((int)groupValue);
fieldType.indexedToReadable(bytesRef, charsRef);
String group = charsRef.toString();
outMap.add(group, slice);
} else {
outMap.add(numericToString(fieldType, groupValue), slice);
}
}
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (rb.doExpand && rb.stage < finishingStage) {
@ -533,8 +545,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
DocIdSetIterator iterator = new BitSetIterator(groupBits, 0); // cost is not useful here
int group;
while ((group = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit, Integer.MAX_VALUE) : TopFieldCollector.create(sort, limit, Integer.MAX_VALUE);
groups.put(group, collector);
groups.put(group, getCollector(limit, sort));
}
this.collapsedSet = collapsedSet;
@ -614,11 +625,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
int numGroups = collapsedSet.size();
this.nullValue = nullValue;
groups = new LongObjectHashMap<>(numGroups);
Iterator<LongCursor> iterator = groupSet.iterator();
while (iterator.hasNext()) {
LongCursor cursor = iterator.next();
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit, Integer.MAX_VALUE) : TopFieldCollector.create(sort, limit, Integer.MAX_VALUE);
groups.put(cursor.value, collector);
for (LongCursor cursor : groupSet) {
groups.put(cursor.value, getCollector(limit, sort));
}
this.field = field;
@ -681,6 +689,18 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
return groups.iterator().next().value.scoreMode(); // we assume all the collectors should have the same nature
}
}
default Collector getCollector(int limit, Sort sort) throws IOException {
Collector collector;
if (limit == 0) {
collector = new TotalHitCountCollector();
} else if (sort == null) {
collector = TopScoreDocCollector.create(limit, Integer.MAX_VALUE);
} else {
collector = TopFieldCollector.create(sort, limit, Integer.MAX_VALUE);
}
return collector;
}
}
private Query getGroupQuery(String fname,

View File

@ -56,7 +56,7 @@ public class DocSlice implements DocList, Accountable {
this.scores=scores;
this.matches=matches;
this.maxScore=maxScore;
this.ramBytesUsed = BASE_RAM_BYTES_USED + ((long)docs.length << 2) + (scores == null ? 0 : ((long)scores.length<<2)+RamUsageEstimator.NUM_BYTES_ARRAY_HEADER);
this.ramBytesUsed = BASE_RAM_BYTES_USED + (docs == null ? 0 : ((long)docs.length << 2)) + (scores == null ? 0 : ((long)scores.length<<2)+RamUsageEstimator.NUM_BYTES_ARRAY_HEADER);
}
@Override

View File

@ -153,6 +153,42 @@ public class DistributedExpandComponentTest extends BaseDistributedSearchTestCas
assertExpandGroupCountAndOrder("group3", 1, results, "9");
assertExpandGroupCountAndOrder("group4", 1, results, "14");
//Test expand.rows = 0 - no docs only expand count
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("fq", "{!collapse field="+group+"}");
params.add("defType", "edismax");
params.add("bf", "field(test_i)");
params.add("expand", "true");
params.add("expand.rows", "0");
params.add("fl", "id");
setDistributedParams(params);
rsp = queryServer(params);
results = rsp.getExpandedResults();
assertExpandGroups(results, "group1","group2", "group3", "group4");
assertExpandGroupCountAndOrder("group1", 0, results);
assertExpandGroupCountAndOrder("group2", 0, results);
assertExpandGroupCountAndOrder("group3", 0, results);
assertExpandGroupCountAndOrder("group4", 0, results);
//Test expand.rows = 0 with expand.field
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("fq", "test_l:10");
params.add("defType", "edismax");
params.add("expand", "true");
params.add("expand.fq", "test_f:2000");
params.add("expand.field", group);
params.add("expand.rows", "0");
params.add("fl", "id,score");
setDistributedParams(params);
rsp = queryServer(params);
results = rsp.getExpandedResults();
assertExpandGroups(results, "group1", "group4");
assertExpandGroupCountAndOrder("group1", 0, results);
assertExpandGroupCountAndOrder("group4", 0, results);
//Test key-only fl

View File

@ -234,6 +234,60 @@ public class TestExpandComponent extends SolrTestCaseJ4 {
"/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc[1]/str[@name='id'][.='8']"
);
//Test expand.rows = 0 - no docs only expand count
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("fq", "{!collapse field="+group+hint+"}");
params.add("defType", "edismax");
params.add("bf", "field(test_i)");
params.add("expand", "true");
params.add("expand.rows", "0");
assertQ(req(params), "*[count(/response/result/doc)=2]",
"*[count(/response/lst[@name='expanded']/result)=2]",
"*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]",
"*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]",
"/response/result/doc[1]/str[@name='id'][.='2']",
"/response/result/doc[2]/str[@name='id'][.='6']"
);
//Test expand.rows = 0 with expand.field
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("fq", "type_s:parent");
params.add("defType", "edismax");
params.add("bf", "field(test_i)");
params.add("expand", "true");
params.add("expand.fq", "type_s:child");
params.add("expand.field", group);
params.add("expand.rows", "0");
assertQ(req(params, "fl", "id"), "*[count(/response/result/doc)=2]",
"*[count(/response/lst[@name='expanded']/result)=2]",
"*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]",
"*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]",
"/response/result/doc[1]/str[@name='id'][.='1']",
"/response/result/doc[2]/str[@name='id'][.='5']"
);
//Test score with expand.rows = 0
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("fq", "type_s:parent");
params.add("defType", "edismax");
params.add("bf", "field(test_i)");
params.add("expand", "true");
params.add("expand.fq", "*:*");
params.add("expand.field", group);
params.add("expand.rows", "0");
assertQ(req(params, "fl", "id,score"), "*[count(/response/result/doc)=2]",
"*[count(/response/lst[@name='expanded']/result)=2]",
"*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]",
"*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]",
"*[count(/response/lst[@name='expanded']/result[@maxScore])=0]", //maxScore should not be available
"/response/result/doc[1]/str[@name='id'][.='1']",
"/response/result/doc[2]/str[@name='id'][.='5']",
"count(//*[@name='score' and .='NaN'])=0"
);
//Test no group results
params = new ModifiableSolrParams();

View File

@ -150,6 +150,11 @@ Orders the documents within the expanded groups. The default is `score desc`.
`expand.rows`::
The number of rows to display in each group. The default is 5 rows.
[IMPORTANT]
====
When `expand.rows=0`, then only the number of documents found for each expanded value is returned. Hence, scores won't be computed even if requested. `maxScore` is set to 0
====
`expand.q`::
Overrides the main query (`q`), determines which documents to include in the main group. The default is to use the main query.