diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index efacec49bfc..5d0af5c9720 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -227,6 +227,9 @@ Bug Fixes * SOLR-2039: Multivalued fields with dynamic names does not work properly with DIH. (K A, ruslan.shv, Cao Manh Dat via shalin) + +* SOLR-4164: group.limit=-1 was not supported for grouping in distributed mode. + (Cao Manh Dat, Lance Norskog, Webster Homer, hossman, yonik) Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index bc806293b37..09fc74b8026 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -414,6 +414,9 @@ public class QueryComponent extends SearchComponent .setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0) .setSearcher(searcher); + int docsToCollect = Grouping.getMax(groupingSpec.getGroupOffset(), groupingSpec.getGroupLimit(), searcher.maxDoc()); + docsToCollect = Math.max(docsToCollect, 1); + for (String field : groupingSpec.getFields()) { SchemaField schemaField = schema.getField(field); String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field); @@ -436,7 +439,7 @@ public class QueryComponent extends SearchComponent .setGroupSort(groupingSpec.getGroupSort()) .setSortWithinGroup(groupingSpec.getSortWithinGroup()) .setFirstPhaseGroups(topGroups) - .setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit()) + .setMaxDocPerGroup(docsToCollect) .setNeedScores(needScores) .setNeedMaxScore(needScores) .build() @@ -445,7 +448,7 @@ public class QueryComponent extends SearchComponent for (String query : groupingSpec.getQueries()) { secondPhaseBuilder.addCommandField(new Builder() - .setDocsToCollect(groupingSpec.getOffset() + groupingSpec.getLimit()) + .setDocsToCollect(docsToCollect) .setSort(groupingSpec.getGroupSort()) .setQuery(query, rb.req) .setDocSet(searcher) diff --git a/solr/core/src/java/org/apache/solr/search/Grouping.java b/solr/core/src/java/org/apache/solr/search/Grouping.java index 80a6aebda85..8d6f3ca16e5 100644 --- a/solr/core/src/java/org/apache/solr/search/Grouping.java +++ b/solr/core/src/java/org/apache/solr/search/Grouping.java @@ -459,10 +459,10 @@ public class Grouping { * * @param offset The offset * @param len The number of documents to return - * @param max The number of document to return if len < 0 or if offset + len < 0 + * @param max The number of document to return if len < 0 or if offset + len > 0 * @return offset + len if len equals zero or higher. Otherwise returns max */ - int getMax(int offset, int len, int max) { + public static int getMax(int offset, int len, int max) { int v = len < 0 ? max : offset + len; if (v < 0 || v > max) v = max; return v; diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java index d0a06c59d11..688a6c37011 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java @@ -162,7 +162,14 @@ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor { } TopGroups[] topGroupsArr = new TopGroups[topGroups.size()]; - rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroupDefault, TopGroups.ScoreMergeMode.None)); + int docsPerGroup = docsPerGroupDefault; + if (docsPerGroup < 0) { + docsPerGroup = 0; + for (TopGroups subTopGroups : topGroups) { + docsPerGroup += subTopGroups.totalGroupedHitCount; + } + } + rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroup, TopGroups.ScoreMergeMode.None)); } for (String query : commandTopDocs.keySet()) { diff --git a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java index af42ff419c8..ad62fccc15b 100644 --- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java +++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java @@ -58,12 +58,12 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { handle.put("grouped", UNORDERED); // distrib grouping doesn't guarantee order of top level group commands // Test distributed grouping with empty indices - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc"); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1); - query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH"); - query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1); + query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH"); + query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1); indexr(id,1, i1, 100, tlong, 100, i1dv, 100, t1,"now is the time for all good men", tdate_a, "2010-04-20T11:00:00Z", @@ -154,23 +154,24 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { // test grouping // The second sort = id asc . The sorting behaviour is different in dist mode. See TopDocs#merge // The shard the result came from matters in the order if both document sortvalues are equal - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc"); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "id asc, _docid_ asc"); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "{!func}add(" + i1 + ",5) asc, id asc"); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong); - query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH"); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.sort", "id desc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 0, "sort", i1 + " asc, id asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "id asc, _docid_ asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "{!func}add(" + i1 + ",5) asc, id asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong); + query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.sort", "id desc"); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc"); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc"); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.format", "simple"); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.main", "true"); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.format", "simple", "offset", 5, "rows", 5); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.main", "true", "offset", 5, "rows", 5); - query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc"); + query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", -1, "sort", i1 + " asc, id asc"); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc"); query("q", "*:*", "fl", "id," + i1dv, "group", "true", "group.field", i1dv, "group.limit", 10, "sort", i1 + " asc, id asc"); @@ -180,7 +181,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.query", "id:5", // single doc, so only one shard will have it - "group.limit", 10, "sort", i1 + " asc, id asc"); + "group.limit", -1, "sort", i1 + " asc, id asc"); handle.put(t1 + ":this_will_never_match", SKIP); // :TODO: SOLR-4181 query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", @@ -220,8 +221,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { } // SOLR-3316 - query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); - query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1); + query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); + query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1); // SOLR-3436 query("q", "*:*", "fq", s1 + ":a", "fl", "id," + i1, "group", "true", "group.field", i1, "sort", i1 + " asc, id asc", "group.ngroups", "true"); @@ -241,7 +242,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { } ModifiableSolrParams params = new ModifiableSolrParams(); - Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "group.ngroups", "true"}; + Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "group.ngroups", "true"}; for (int i = 0; i < q.length; i += 2) { params.add(q[i].toString(), q[i + 1].toString()); @@ -263,25 +264,25 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { // We validate distributed grouping with scoring as first sort. // note: this 'q' matches all docs and returns the 'id' as the score, which is unique and so our results should be deterministic. handle.put("maxScore", SKIP);// TODO see SOLR-6612 - query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955 - query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc"); - query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10); + query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955 + query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "score desc, _docid_ asc, id asc"); + query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1); // some explicit checks of non default sorting, and sort/group.sort with diff clauses query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true", - "group.field", i1, "group.limit", 10, + "group.field", i1, "group.limit", -1, "sort", tlong+" asc, id desc"); query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true", - "group.field", i1, "group.limit", 10, + "group.field", i1, "group.limit", -1, "sort", "id asc", "group.sort", tlong+" asc, id desc"); query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true", - "group.field", i1, "group.limit", 10, + "group.field", i1, "group.limit", -1, "sort", tlong+" asc, id desc", "group.sort", "id asc"); rsp = query("q", "{!func}id", "fq", oddField+":[* TO *]", "rows", 100, "fl", tlong + ",id," + i1, "group", "true", - "group.field", i1, "group.limit", 10, + "group.field", i1, "group.limit", -1, "sort", tlong+" asc", "group.sort", oddField+" asc"); nl = (NamedList) rsp.getResponse().get("grouped");