diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 33c0e8d9b99..6277d826e70 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -339,7 +339,7 @@ New Features
can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name
parameter to specify the named . (koji)
-* SOLR-2066: Added support for distributed grouping.
+* SOLR-2066,SOLR-2776: Added support for distributed grouping.
(Martijn van Groningen, Jasper van Veghel, Matt Beaumont)
Bug Fixes
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index b175ed1e8cf..9861860553c 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -190,6 +190,7 @@ public class QueryComponent extends SearchComponent
groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0);
+ groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
}
@@ -402,6 +403,7 @@ public class QueryComponent extends SearchComponent
} else if (params.getBool("group.distibuted.second", false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
+ .setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
@@ -453,7 +455,6 @@ public class QueryComponent extends SearchComponent
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
- boolean truncateGroups = params.getBool(GroupParams.GROUP_TRUNCATE, false);
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
int limitDefault = cmd.getLen(); // this is normally from "rows"
@@ -466,7 +467,7 @@ public class QueryComponent extends SearchComponent
.setDefaultTotalCount(defaultTotalCount)
.setDocsPerGroupDefault(groupingSpec.getGroupLimit())
.setGroupOffsetDefault(groupingSpec.getGroupOffset())
- .setGetGroupedDocSet(truncateGroups);
+ .setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/Command.java b/solr/core/src/java/org/apache/solr/search/grouping/Command.java
index bafb2ebe2fb..9b361fa5301 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/Command.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/Command.java
@@ -25,18 +25,44 @@ import java.io.IOException;
import java.util.List;
/**
+ * Defines a grouping command.
+ * This is an abstraction on how the {@link Collector} instances are created
+ * and how the results are retrieved from the {@link Collector} instances.
*
+ * @lucene.experimental
*/
public interface Command {
+ /**
+ * Returns a list of {@link Collector} instances to be
+ * included in the search based on the .
+ *
+ * @return a list of {@link Collector} instances
+ * @throws IOException If I/O related errors occur
+ */
List create() throws IOException;
+ /**
+ * Returns the results that the collectors created
+ * by {@link #create()} contain after a search has been executed.
+ *
+ * @return The results of the collectors
+ */
T result();
+ /**
+ * @return The key of this command to uniquely identify itself
+ */
String getKey();
+ /**
+ * @return The group sort (overall sort)
+ */
Sort getGroupSort();
+ /**
+ * @return The sort inside a group
+ */
Sort getSortWithinGroup();
}
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java b/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
index 98e53e56fe4..337e252c1ea 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
@@ -21,6 +21,10 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
+import org.apache.lucene.search.grouping.TermAllGroupHeadsCollector;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.*;
import org.apache.solr.search.grouping.distributed.shardresultserializer.ShardResultTransformer;
@@ -30,7 +34,10 @@ import java.util.ArrayList;
import java.util.List;
/**
+ * Responsible for executing a search with a number of {@link Command} instances.
+ * A typical search can have more then one {@link Command} instances.
*
+ * @lucene.experimental
*/
public class CommandHandler {
@@ -40,6 +47,7 @@ public class CommandHandler {
private List commands = new ArrayList();
private SolrIndexSearcher searcher;
private boolean needDocSet = false;
+ private boolean truncateGroups = false;
public Builder setQueryCommand(SolrIndexSearcher.QueryCommand queryCommand) {
this.queryCommand = queryCommand;
@@ -69,12 +77,17 @@ public class CommandHandler {
return this;
}
+ public Builder setTruncateGroups(boolean truncateGroups) {
+ this.truncateGroups = truncateGroups;
+ return this;
+ }
+
public CommandHandler build() {
if (queryCommand == null || searcher == null) {
throw new IllegalStateException("All fields must be set");
}
- return new CommandHandler(queryCommand, commands, searcher, needDocSet);
+ return new CommandHandler(queryCommand, commands, searcher, needDocSet, truncateGroups);
}
}
@@ -83,17 +96,19 @@ public class CommandHandler {
private final List commands;
private final SolrIndexSearcher searcher;
private final boolean needDocset;
+ private final boolean truncateGroups;
private DocSet docSet;
private CommandHandler(SolrIndexSearcher.QueryCommand queryCommand,
List commands,
SolrIndexSearcher searcher,
- boolean needDocset) {
+ boolean needDocset, boolean truncateGroups) {
this.queryCommand = queryCommand;
this.commands = commands;
this.searcher = searcher;
this.needDocset = needDocset;
+ this.truncateGroups = truncateGroups;
}
@SuppressWarnings("unchecked")
@@ -109,31 +124,48 @@ public class CommandHandler {
);
Filter luceneFilter = pf.filter;
Query query = QueryUtils.makeQueryable(queryCommand.getQuery());
- Collector wrappedCollectors;
- if (collectors.isEmpty()) {
- wrappedCollectors = null;
+
+ if (truncateGroups && nrOfCommands > 0) {
+ docSet = computeGroupedDocSet(query, luceneFilter, collectors);
+ } else if (needDocset) {
+ docSet = computeDocSet(query, luceneFilter, collectors);
} else {
- wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands]));
+ searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands])));
+ }
+ }
+
+ private DocSet computeGroupedDocSet(Query query, Filter luceneFilter, List collectors) throws IOException {
+ Command firstCommand = commands.get(0);
+ AbstractAllGroupHeadsCollector termAllGroupHeadsCollector =
+ TermAllGroupHeadsCollector.create(firstCommand.getKey(), firstCommand.getSortWithinGroup());
+ if (collectors.isEmpty()) {
+ searcher.search(query, luceneFilter, termAllGroupHeadsCollector);
+ } else {
+ collectors.add(termAllGroupHeadsCollector);
+ searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])));
}
- if (wrappedCollectors == null && needDocset) {
- int maxDoc = searcher.maxDoc();
- DocSetCollector docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
- searcher.search(query, luceneFilter, docSetCollector);
- docSet = docSetCollector.getDocSet();
- } else if (needDocset) {
- int maxDoc = searcher.maxDoc();
- DocSetCollector docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
- searcher.search(query, luceneFilter, docSetCollector);
- docSet = docSetCollector.getDocSet();
+ int maxDoc = searcher.maxDoc();
+ long[] bits = termAllGroupHeadsCollector.retrieveGroupHeads(maxDoc).getBits();
+ return new BitDocSet(new OpenBitSet(bits, bits.length));
+ }
+
+ private DocSet computeDocSet(Query query, Filter luceneFilter, List collectors) throws IOException {
+ int maxDoc = searcher.maxDoc();
+ DocSetCollector docSetCollector;
+ if (collectors.isEmpty()) {
+ docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
} else {
- searcher.search(query, luceneFilter, wrappedCollectors);
+ Collector wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
+ docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
}
+ searcher.search(query, luceneFilter, docSetCollector);
+ return docSetCollector.getDocSet();
}
@SuppressWarnings("unchecked")
public NamedList processResult(SolrIndexSearcher.QueryResult queryResult, ShardResultTransformer transformer) throws IOException {
- if (needDocset) {
+ if (docSet != null) {
queryResult.setDocSet(docSet);
}
return transformer.transform(commands);
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
index cf4ae0274e7..21b6713c040 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
@@ -22,6 +22,8 @@ import org.apache.solr.search.Grouping;
/**
* Encapsulates the grouping options like fields group sort and more specified by clients.
+ *
+ * @lucene.experimental
*/
public class GroupingSpecification {
@@ -38,6 +40,7 @@ public class GroupingSpecification {
private boolean main;
private Grouping.Format responseFormat;
private boolean needScore;
+ private boolean truncateGroups;
public String[] getFields() {
return fields;
@@ -155,4 +158,11 @@ public class GroupingSpecification {
this.needScore = needScore;
}
+ public boolean isTruncateGroups() {
+ return truncateGroups;
+ }
+
+ public void setTruncateGroups(boolean truncateGroups) {
+ this.truncateGroups = truncateGroups;
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java b/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
index 4ebcf201ed4..9e157cefe53 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
@@ -25,7 +25,9 @@ import org.apache.solr.search.DocSet;
import java.io.IOException;
/**
- * A collector that filters incoming doc ids that are not in the filter
+ * A collector that filters incoming doc ids that are not in the filter.
+ *
+ * @lucene.experimental
*/
public class FilterCollector extends Collector {
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java
index 92726f55120..88cdd8ee931 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java
@@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
/**
* Responsible for creating shard requests to the shards in the cluster to perform distributed grouping.
+ *
+ * @lucene.experimental
*/
public interface ShardRequestFactory {
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java
index fe6fda3f487..729685a7770 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java
@@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
/**
* Responsible for processing shard responses.
+ *
+ * @lucene.experimental
*/
public interface ShardResponseProcessor {
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
index d6e1517b835..363b8b87ed4 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
@@ -79,7 +79,7 @@ public class QueryCommand implements Command {
*
* @param searcher The searcher executing the
* @return this
- * @throws IOException
+ * @throws IOException If I/O related errors occur.
*/
public Builder setDocSet(SolrIndexSearcher searcher) throws IOException {
return setDocSet(searcher.getDocSet(query));
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java
index c0902f78a8e..eb4d5c4c9b5 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java
@@ -26,6 +26,8 @@ import java.io.IOException;
* A ShardResultTransformer
is responsible for transforming a grouped shard result into group related
* structures (such as {@link org.apache.lucene.search.grouping.TopGroups} and {@link org.apache.lucene.search.grouping.SearchGroup})
* and visa versa.
+ *
+ * @lucene.experimental
*/
public interface ShardResultTransformer {
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java
index b58b4b7b354..efb58598b93 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java
@@ -26,6 +26,8 @@ import java.util.Map;
/**
* Responsible for transforming the grouped result into the final format for displaying purposes.
+ *
+ * @lucene.experimental
*/
public interface EndResultTransformer {
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
index 7235942b1de..e4b9e9c8a92 100755
--- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
@@ -56,6 +56,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
+ query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
@@ -105,7 +106,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
for (int shard = 0; shard < clients.size(); shard++) {
int groupValue = values[shard];
for (int i = 500; i < 600; i++) {
- index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1));
+ index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1), t1, shard);
}
}
@@ -135,6 +136,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
// In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true");
+ query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true");
+ query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
// We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur
simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");