From fda935482b76b3640ce4b02f9af026f7696f2ff7 Mon Sep 17 00:00:00 2001 From: ameliahenderson <61994179+ameliahenderson@users.noreply.github.com> Date: Tue, 17 Mar 2020 23:53:04 -0400 Subject: [PATCH] SOLR-8306: Optimize expand.rows=0 to compute only total hits (#1334) * When expand.rows=0, expand documents are not returned. So, computing them could be avoided and only total hits need to be computed --- solr/CHANGES.txt | 2 +- .../handler/component/ExpandComponent.java | 80 ++++++++++++------- .../java/org/apache/solr/search/DocSlice.java | 2 +- .../DistributedExpandComponentTest.java | 36 +++++++++ .../component/TestExpandComponent.java | 54 +++++++++++++ .../src/collapse-and-expand-results.adoc | 5 ++ 6 files changed, 147 insertions(+), 32 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a82710d363d..0acc3e03206 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -57,7 +57,7 @@ Improvements Optimizations --------------------- -(No changes) +* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson) Bug Fixes --------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java index 8cee08c032d..47fa0c05f57 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java @@ -53,6 +53,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.TotalHits; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BytesRef; @@ -420,30 +421,30 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia CharsRefBuilder charsRef = new CharsRefBuilder(); for (LongObjectCursor cursor : groups) { long groupValue = cursor.key; - TopDocsCollector topDocsCollector = TopDocsCollector.class.cast(cursor.value); - TopDocs topDocs = topDocsCollector.topDocs(); - ScoreDoc[] scoreDocs = topDocs.scoreDocs; - if (scoreDocs.length > 0) { - if (returnFields.wantsScore() && sort != null) { - TopFieldCollector.populateScores(scoreDocs, searcher, query); + if (cursor.value instanceof TopDocsCollector) { + TopDocsCollector topDocsCollector = TopDocsCollector.class.cast(cursor.value); + TopDocs topDocs = topDocsCollector.topDocs(); + ScoreDoc[] scoreDocs = topDocs.scoreDocs; + if (scoreDocs.length > 0) { + if (returnFields.wantsScore() && sort != null) { + TopFieldCollector.populateScores(scoreDocs, searcher, query); + } + int[] docs = new int[scoreDocs.length]; + float[] scores = new float[scoreDocs.length]; + for (int i = 0; i < docs.length; i++) { + ScoreDoc scoreDoc = scoreDocs[i]; + docs[i] = scoreDoc.doc; + scores[i] = scoreDoc.score; + } + assert topDocs.totalHits.relation == TotalHits.Relation.EQUAL_TO; + DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits.value, Float.NaN); + addGroupSliceToOutputMap(fieldType, ordBytes, outMap, charsRef, groupValue, slice); } - int[] docs = new int[scoreDocs.length]; - float[] scores = new float[scoreDocs.length]; - for (int i = 0; i < docs.length; i++) { - ScoreDoc scoreDoc = scoreDocs[i]; - docs[i] = scoreDoc.doc; - scores[i] = scoreDoc.score; - } - assert topDocs.totalHits.relation == TotalHits.Relation.EQUAL_TO; - DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits.value, Float.NaN); - - if(fieldType instanceof StrField) { - final BytesRef bytesRef = ordBytes.get((int)groupValue); - fieldType.indexedToReadable(bytesRef, charsRef); - String group = charsRef.toString(); - outMap.add(group, slice); - } else { - outMap.add(numericToString(fieldType, groupValue), slice); + } else { + int totalHits = ((TotalHitCountCollector) cursor.value).getTotalHits(); + if (totalHits > 0) { + DocSlice slice = new DocSlice(0, 0, null, null, totalHits, 0); + addGroupSliceToOutputMap(fieldType, ordBytes, outMap, charsRef, groupValue, slice); } } } @@ -451,6 +452,17 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia rb.rsp.add("expanded", outMap); } + private void addGroupSliceToOutputMap(FieldType fieldType, IntObjectHashMap ordBytes, NamedList outMap, CharsRefBuilder charsRef, long groupValue, DocSlice slice) { + if(fieldType instanceof StrField) { + final BytesRef bytesRef = ordBytes.get((int)groupValue); + fieldType.indexedToReadable(bytesRef, charsRef); + String group = charsRef.toString(); + outMap.add(group, slice); + } else { + outMap.add(numericToString(fieldType, groupValue), slice); + } + } + @Override public int distributedProcess(ResponseBuilder rb) throws IOException { if (rb.doExpand && rb.stage < finishingStage) { @@ -533,8 +545,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia DocIdSetIterator iterator = new BitSetIterator(groupBits, 0); // cost is not useful here int group; while ((group = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - Collector collector = (sort == null) ? TopScoreDocCollector.create(limit, Integer.MAX_VALUE) : TopFieldCollector.create(sort, limit, Integer.MAX_VALUE); - groups.put(group, collector); + groups.put(group, getCollector(limit, sort)); } this.collapsedSet = collapsedSet; @@ -614,11 +625,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia int numGroups = collapsedSet.size(); this.nullValue = nullValue; groups = new LongObjectHashMap<>(numGroups); - Iterator iterator = groupSet.iterator(); - while (iterator.hasNext()) { - LongCursor cursor = iterator.next(); - Collector collector = (sort == null) ? TopScoreDocCollector.create(limit, Integer.MAX_VALUE) : TopFieldCollector.create(sort, limit, Integer.MAX_VALUE); - groups.put(cursor.value, collector); + for (LongCursor cursor : groupSet) { + groups.put(cursor.value, getCollector(limit, sort)); } this.field = field; @@ -681,6 +689,18 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia return groups.iterator().next().value.scoreMode(); // we assume all the collectors should have the same nature } } + + default Collector getCollector(int limit, Sort sort) throws IOException { + Collector collector; + if (limit == 0) { + collector = new TotalHitCountCollector(); + } else if (sort == null) { + collector = TopScoreDocCollector.create(limit, Integer.MAX_VALUE); + } else { + collector = TopFieldCollector.create(sort, limit, Integer.MAX_VALUE); + } + return collector; + } } private Query getGroupQuery(String fname, diff --git a/solr/core/src/java/org/apache/solr/search/DocSlice.java b/solr/core/src/java/org/apache/solr/search/DocSlice.java index b12ef1a916d..ba8fb833167 100644 --- a/solr/core/src/java/org/apache/solr/search/DocSlice.java +++ b/solr/core/src/java/org/apache/solr/search/DocSlice.java @@ -56,7 +56,7 @@ public class DocSlice implements DocList, Accountable { this.scores=scores; this.matches=matches; this.maxScore=maxScore; - this.ramBytesUsed = BASE_RAM_BYTES_USED + ((long)docs.length << 2) + (scores == null ? 0 : ((long)scores.length<<2)+RamUsageEstimator.NUM_BYTES_ARRAY_HEADER); + this.ramBytesUsed = BASE_RAM_BYTES_USED + (docs == null ? 0 : ((long)docs.length << 2)) + (scores == null ? 0 : ((long)scores.length<<2)+RamUsageEstimator.NUM_BYTES_ARRAY_HEADER); } @Override diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java index 4662a81980b..01e28e2c657 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java @@ -153,6 +153,42 @@ public class DistributedExpandComponentTest extends BaseDistributedSearchTestCas assertExpandGroupCountAndOrder("group3", 1, results, "9"); assertExpandGroupCountAndOrder("group4", 1, results, "14"); + //Test expand.rows = 0 - no docs only expand count + + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field="+group+"}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_i)"); + params.add("expand", "true"); + params.add("expand.rows", "0"); + params.add("fl", "id"); + setDistributedParams(params); + rsp = queryServer(params); + results = rsp.getExpandedResults(); + assertExpandGroups(results, "group1","group2", "group3", "group4"); + assertExpandGroupCountAndOrder("group1", 0, results); + assertExpandGroupCountAndOrder("group2", 0, results); + assertExpandGroupCountAndOrder("group3", 0, results); + assertExpandGroupCountAndOrder("group4", 0, results); + + //Test expand.rows = 0 with expand.field + + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "test_l:10"); + params.add("defType", "edismax"); + params.add("expand", "true"); + params.add("expand.fq", "test_f:2000"); + params.add("expand.field", group); + params.add("expand.rows", "0"); + params.add("fl", "id,score"); + setDistributedParams(params); + rsp = queryServer(params); + results = rsp.getExpandedResults(); + assertExpandGroups(results, "group1", "group4"); + assertExpandGroupCountAndOrder("group1", 0, results); + assertExpandGroupCountAndOrder("group4", 0, results); //Test key-only fl diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java b/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java index 7adc942b8e9..edc2de205ad 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java +++ b/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java @@ -234,6 +234,60 @@ public class TestExpandComponent extends SolrTestCaseJ4 { "/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc[1]/str[@name='id'][.='8']" ); + //Test expand.rows = 0 - no docs only expand count + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field="+group+hint+"}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_i)"); + params.add("expand", "true"); + params.add("expand.rows", "0"); + assertQ(req(params), "*[count(/response/result/doc)=2]", + "*[count(/response/lst[@name='expanded']/result)=2]", + "*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]", + "*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]", + "/response/result/doc[1]/str[@name='id'][.='2']", + "/response/result/doc[2]/str[@name='id'][.='6']" + ); + + //Test expand.rows = 0 with expand.field + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "type_s:parent"); + params.add("defType", "edismax"); + params.add("bf", "field(test_i)"); + params.add("expand", "true"); + params.add("expand.fq", "type_s:child"); + params.add("expand.field", group); + params.add("expand.rows", "0"); + assertQ(req(params, "fl", "id"), "*[count(/response/result/doc)=2]", + "*[count(/response/lst[@name='expanded']/result)=2]", + "*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]", + "*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]", + "/response/result/doc[1]/str[@name='id'][.='1']", + "/response/result/doc[2]/str[@name='id'][.='5']" + ); + + //Test score with expand.rows = 0 + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "type_s:parent"); + params.add("defType", "edismax"); + params.add("bf", "field(test_i)"); + params.add("expand", "true"); + params.add("expand.fq", "*:*"); + params.add("expand.field", group); + params.add("expand.rows", "0"); + assertQ(req(params, "fl", "id,score"), "*[count(/response/result/doc)=2]", + "*[count(/response/lst[@name='expanded']/result)=2]", + "*[count(/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc)=0]", + "*[count(/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc)=0]", + "*[count(/response/lst[@name='expanded']/result[@maxScore])=0]", //maxScore should not be available + "/response/result/doc[1]/str[@name='id'][.='1']", + "/response/result/doc[2]/str[@name='id'][.='5']", + "count(//*[@name='score' and .='NaN'])=0" + + ); //Test no group results params = new ModifiableSolrParams(); diff --git a/solr/solr-ref-guide/src/collapse-and-expand-results.adoc b/solr/solr-ref-guide/src/collapse-and-expand-results.adoc index 598bf5b84b4..fec769aa22f 100644 --- a/solr/solr-ref-guide/src/collapse-and-expand-results.adoc +++ b/solr/solr-ref-guide/src/collapse-and-expand-results.adoc @@ -150,6 +150,11 @@ Orders the documents within the expanded groups. The default is `score desc`. `expand.rows`:: The number of rows to display in each group. The default is 5 rows. +[IMPORTANT] +==== +When `expand.rows=0`, then only the number of documents found for each expanded value is returned. Hence, scores won't be computed even if requested. `maxScore` is set to 0 +==== + `expand.q`:: Overrides the main query (`q`), determines which documents to include in the main group. The default is to use the main query.