SOLR-14329: support choosing expand field from multiple collapse group

* The collapse group with low cost is given higher priority. If
  there are multiple groups  with same cost then, first such
  group is chosen
This commit is contained in:
Munendra S N 2020-03-28 11:23:45 +05:30
parent 7a83f09fbc
commit 15330a8541
5 changed files with 86 additions and 4 deletions

View File

@ -69,6 +69,8 @@ Improvements
* SOLR-14342: Load cores in an order that makes collections available sooner and reduces leaderVoteWait timeouts in
large SolrCloud clusters. (David Smiley)
* SOLR-14329: Add support to choose collapse group to expand in ExpandComponent based on cost (Munendra S N)
Optimizations
---------------------
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)

View File

@ -31,6 +31,7 @@ import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -92,6 +93,8 @@ import org.apache.solr.util.plugin.SolrCoreAware;
* The CollapsingPostFilter collapses a result set on a field.
* <p>
* The ExpandComponent expands the collapsed groups for a single page.
* When multiple collapse groups are specified then, the field is chosen from collapse group with min cost.
* If the cost are equal then, the field is chosen from first collapse group.
* <p>
* http parameters:
* <p>
@ -100,7 +103,7 @@ import org.apache.solr.util.plugin.SolrCoreAware;
* expand.sort=field asc|desc<br>
* expand.q=*:* (optional, overrides the main query)<br>
* expand.fq=type:child (optional, overrides the main filter queries)<br>
* expand.field=field (mandatory if the not used with the CollapsingQParserPlugin)<br>
* expand.field=field (mandatory, if the not used with the CollapsingQParserPlugin. This is given higher priority when both are present)<br>
*/
public class ExpandComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware {
public static final String COMPONENT_NAME = "expand";
@ -143,15 +146,21 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
if (field == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
int cost = Integer.MAX_VALUE;
for (Query q : filters) {
if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
// if there are multiple collapse pick the low cost one
// if cost are equal then first one is picked
if (cp.getCost() < cost) {
cost = cp.getCost();
field = cp.getField();
hint = cp.hint;
}
}
}
}
}
if (field == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "missing expand field");
@ -189,7 +198,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
}
} else {
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
if (StringUtils.isNotBlank(fq) && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery());
}

View File

@ -90,6 +90,19 @@ public class DistributedExpandComponentTest extends BaseDistributedSearchTestCas
//Test page 2
query("q", "*:*", "start","1", "rows", "1", "fq", "{!collapse field="+group+"}", "defType", "edismax", "bf", "field(test_i)", "expand", "true", "fl","*,score");
// multiple collapse and equal cost
ModifiableSolrParams baseParams = params("q", "*:*", "defType", "edismax", "expand", "true", "fl", "*,score",
"bf", "field(test_i)", "expand.sort", "id asc");
baseParams.set("fq", "{!collapse field="+group+"}", "{!collapse field=test_i}");
query(baseParams);
// multiple collapse and unequal cost case1
baseParams.set("fq", "{!collapse cost=1000 field="+group+"}", "{!collapse cost=2000 field=test_i}");
query(baseParams);
// multiple collapse and unequal cost case2
baseParams.set("fq", "{!collapse cost=1000 field="+group+"}", "{!collapse cost=200 field=test_i}");
query(baseParams);
ignoreException("missing expand field");
SolrException e = expectThrows(SolrException.class, () -> query("q", "*:*", "expand", "true"));

View File

@ -419,6 +419,48 @@ public class TestExpandComponent extends SolrTestCaseJ4 {
"/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc[1]/str[@name='id'][.='1']",
"/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc[1]/str[@name='id'][.='5']"
);
// With multiple collapse
// with different cost
params = params("q", "*:*", "defType", "edismax", "expand", "true", "bf", "field(test_i)", "expand.sort", "id asc");
params.set("fq", "{!collapse cost=1000 field="+group+"}", "{!collapse cost=2000 field=test_f}");
assertQ(req(params),
"*[count(/response/result/doc)=1]",
"/response/result/doc[1]/str[@name='id'][.='2']",
"/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc[1]/str[@name='id'][.='1']"
);
// with same cost (default cost)
params.set("fq", "{!collapse field="+group+"}", "{!collapse field=test_f}");
assertQ(req(params),
"*[count(/response/result/doc)=1]",
"/response/result/doc[1]/str[@name='id'][.='2']",
"/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc[1]/str[@name='id'][.='1']"
);
// with different cost but choose the test_f
params.set("fq", "{!collapse cost=3000 field="+group+"}", "{!collapse cost=2000 field=test_f}");
assertQ(req(params),
"*[count(/response/result/doc)=1]",
"/response/result/doc[1]/str[@name='id'][.='2']",
"/response/lst[@name='expanded']/result[@name='200.0']/doc[1]/str[@name='id'][.='3']",
"/response/lst[@name='expanded']/result[@name='200.0']/doc[2]/str[@name='id'][.='6']",
"/response/lst[@name='expanded']/result[@name='200.0']/doc[3]/str[@name='id'][.='8']"
);
// with different cost and nullPolicy
params.set("bf", "ord(id)");
params.set("fq", "{!collapse cost=1000 field="+group+" nullPolicy=collapse}", "{!collapse cost=2000 field=test_f}");
assertQ(req(params),
"*[count(/response/result/doc)=2]",
"/response/result/doc[1]/str[@name='id'][.='8']",
"/response/result/doc[2]/str[@name='id'][.='7']",
"/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc[1]/str[@name='id'][.='5']",
"/response/lst[@name='expanded']/result[@name='2"+floatAppend+"']/doc[2]/str[@name='id'][.='6']",
"/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc[1]/str[@name='id'][.='1']",
"/response/lst[@name='expanded']/result[@name='1"+floatAppend+"']/doc[2]/str[@name='id'][.='2']"
);
}
@Test

View File

@ -116,6 +116,12 @@ Collapse on `group_field` with a hint to use the top level field cache:
fq={!collapse field=group_field hint=top_fc}
----
Collapse with custom `cost` which defaults to `100`
[source,text]
----
fq={!collapse cost=1000 field=group_field}
----
The CollapsingQParserPlugin fully supports the QueryElevationComponent.
== Expand Component
@ -138,12 +144,22 @@ The ExpandComponent can now be used to expand the results so you can see the doc
q=foo&fq={!collapse field=ISBN}&expand=true
----
[IMPORTANT]
====
When used with CollapsingQParserPlugin and there are multiple collapse groups then, the field is chosen from group with least cost.
If there are multiple collapse groups with same cost then, the first specified one is chosen
====
The “expand=true” parameter turns on the ExpandComponent. The ExpandComponent adds a new section to the search output labeled “expanded”.
Inside the expanded section there is a _map_ with each group head pointing to the expanded documents that are within the group. As applications iterate the main collapsed result set, they can access the _expanded_ map to retrieve the expanded groups.
The ExpandComponent has the following parameters:
`expand.field`::
Field on which expand documents need to be populated. When `expand=true`, either this parameter needs to be specified or should be used with CollapsingQParserPlugin.
When both are specified, this parameter is given higher priority.
`expand.sort`::
Orders the documents within the expanded groups. The default is `score desc`.