SOLR-4164: fix group.limit=-1 in distributed mode

This commit is contained in:
yonik 2016-10-26 11:56:45 -04:00
parent 24446f5085
commit 7794fbd13f
5 changed files with 47 additions and 33 deletions

View File

@ -227,6 +227,9 @@ Bug Fixes
* SOLR-2039: Multivalued fields with dynamic names does not work properly with DIH.
(K A, ruslan.shv, Cao Manh Dat via shalin)
* SOLR-4164: group.limit=-1 was not supported for grouping in distributed mode.
(Cao Manh Dat, Lance Norskog, Webster Homer, hossman, yonik)
Optimizations
----------------------

View File

@ -414,6 +414,9 @@ public class QueryComponent extends SearchComponent
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher);
int docsToCollect = Grouping.getMax(groupingSpec.getGroupOffset(), groupingSpec.getGroupLimit(), searcher.maxDoc());
docsToCollect = Math.max(docsToCollect, 1);
for (String field : groupingSpec.getFields()) {
SchemaField schemaField = schema.getField(field);
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
@ -436,7 +439,7 @@ public class QueryComponent extends SearchComponent
.setGroupSort(groupingSpec.getGroupSort())
.setSortWithinGroup(groupingSpec.getSortWithinGroup())
.setFirstPhaseGroups(topGroups)
.setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit())
.setMaxDocPerGroup(docsToCollect)
.setNeedScores(needScores)
.setNeedMaxScore(needScores)
.build()
@ -445,7 +448,7 @@ public class QueryComponent extends SearchComponent
for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new Builder()
.setDocsToCollect(groupingSpec.getOffset() + groupingSpec.getLimit())
.setDocsToCollect(docsToCollect)
.setSort(groupingSpec.getGroupSort())
.setQuery(query, rb.req)
.setDocSet(searcher)

View File

@ -459,10 +459,10 @@ public class Grouping {
*
* @param offset The offset
* @param len The number of documents to return
* @param max The number of document to return if len < 0 or if offset + len < 0
* @param max The number of document to return if len &lt; 0 or if offset + len &gt; 0
* @return offset + len if len equals zero or higher. Otherwise returns max
*/
int getMax(int offset, int len, int max) {
public static int getMax(int offset, int len, int max) {
int v = len < 0 ? max : offset + len;
if (v < 0 || v > max) v = max;
return v;

View File

@ -162,7 +162,14 @@ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor {
}
TopGroups<BytesRef>[] topGroupsArr = new TopGroups[topGroups.size()];
rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroupDefault, TopGroups.ScoreMergeMode.None));
int docsPerGroup = docsPerGroupDefault;
if (docsPerGroup < 0) {
docsPerGroup = 0;
for (TopGroups subTopGroups : topGroups) {
docsPerGroup += subTopGroups.totalGroupedHitCount;
}
}
rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroup, TopGroups.ScoreMergeMode.None));
}
for (String query : commandTopDocs.keySet()) {

View File

@ -58,12 +58,12 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
handle.put("grouped", UNORDERED); // distrib grouping doesn't guarantee order of top level group commands
// Test distributed grouping with empty indices
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
indexr(id,1, i1, 100, tlong, 100, i1dv, 100, t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
@ -154,23 +154,24 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
// test grouping
// The second sort = id asc . The sorting behaviour is different in dist mode. See TopDocs#merge
// The shard the result came from matters in the order if both document sortvalues are equal
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "id asc, _docid_ asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "{!func}add(" + i1 + ",5) asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong);
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.sort", "id desc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 0, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "id asc, _docid_ asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "{!func}add(" + i1 + ",5) asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong);
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.sort", "id desc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.format", "simple");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.main", "true");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.format", "simple", "offset", 5, "rows", 5);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.main", "true", "offset", 5, "rows", 5);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", -1, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc");
query("q", "*:*", "fl", "id," + i1dv, "group", "true", "group.field", i1dv, "group.limit", 10, "sort", i1 + " asc, id asc");
@ -180,7 +181,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true",
"group.query", t1 + ":kings OR " + t1 + ":eggs",
"group.query", "id:5", // single doc, so only one shard will have it
"group.limit", 10, "sort", i1 + " asc, id asc");
"group.limit", -1, "sort", i1 + " asc, id asc");
handle.put(t1 + ":this_will_never_match", SKIP); // :TODO: SOLR-4181
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true",
"group.query", t1 + ":kings OR " + t1 + ":eggs",
@ -220,8 +221,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
}
// SOLR-3316
query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
// SOLR-3436
query("q", "*:*", "fq", s1 + ":a", "fl", "id," + i1, "group", "true", "group.field", i1, "sort", i1 + " asc, id asc", "group.ngroups", "true");
@ -241,7 +242,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
}
ModifiableSolrParams params = new ModifiableSolrParams();
Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "group.ngroups", "true"};
Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "group.ngroups", "true"};
for (int i = 0; i < q.length; i += 2) {
params.add(q[i].toString(), q[i + 1].toString());
@ -263,25 +264,25 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
// We validate distributed grouping with scoring as first sort.
// note: this 'q' matches all docs and returns the 'id' as the score, which is unique and so our results should be deterministic.
handle.put("maxScore", SKIP);// TODO see SOLR-6612
query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955
query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");
query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10);
query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955
query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "score desc, _docid_ asc, id asc");
query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1);
// some explicit checks of non default sorting, and sort/group.sort with diff clauses
query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
"group.field", i1, "group.limit", 10,
"group.field", i1, "group.limit", -1,
"sort", tlong+" asc, id desc");
query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
"group.field", i1, "group.limit", 10,
"group.field", i1, "group.limit", -1,
"sort", "id asc",
"group.sort", tlong+" asc, id desc");
query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
"group.field", i1, "group.limit", 10,
"group.field", i1, "group.limit", -1,
"sort", tlong+" asc, id desc",
"group.sort", "id asc");
rsp = query("q", "{!func}id", "fq", oddField+":[* TO *]",
"rows", 100, "fl", tlong + ",id," + i1, "group", "true",
"group.field", i1, "group.limit", 10,
"group.field", i1, "group.limit", -1,
"sort", tlong+" asc",
"group.sort", oddField+" asc");
nl = (NamedList<?>) rsp.getResponse().get("grouped");