SOLR-2776: Added support for group.truncate for distributed search. Also added some more jdocs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1173286 13f79535-47bb-0310-9956-ffa450edef68
2011-09-20 18:15:23 +00:00 · 2011-09-20 18:15:23 +00:00 · d58350aead
parent 3013098230
commit d58350aead
12 changed files with 106 additions and 24 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -339,7 +339,7 @@ New Features
  can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name
  parameter to specify the named <boundaryScanner/>. (koji)

-* SOLR-2066: Added support for distributed grouping.
+* SOLR-2066,SOLR-2776: Added support for distributed grouping.
  (Martijn van Groningen, Jasper van Veghel, Matt Beaumont)

 Bug Fixes
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@ -190,6 +190,7 @@ public class QueryComponent extends SearchComponent
    groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
    groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
    groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0);
+    groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
  }


@ -402,6 +403,7 @@ public class QueryComponent extends SearchComponent
        } else if (params.getBool("group.distibuted.second", false)) {
          CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
              .setQueryCommand(cmd)
+              .setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
              .setSearcher(searcher);

          for (String field : groupingSpec.getFields()) {
@ -453,7 +455,6 @@ public class QueryComponent extends SearchComponent

        int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
        boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
-        boolean truncateGroups = params.getBool(GroupParams.GROUP_TRUNCATE, false);
        Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
            Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
        int limitDefault = cmd.getLen(); // this is normally from "rows"
@ -466,7 +467,7 @@ public class QueryComponent extends SearchComponent
            .setDefaultTotalCount(defaultTotalCount)
            .setDocsPerGroupDefault(groupingSpec.getGroupLimit())
            .setGroupOffsetDefault(groupingSpec.getGroupOffset())
-            .setGetGroupedDocSet(truncateGroups);
+            .setGetGroupedDocSet(groupingSpec.isTruncateGroups());

        if (groupingSpec.getFields() != null) {
          for (String field : groupingSpec.getFields()) {
--- a/solr/core/src/java/org/apache/solr/search/grouping/Command.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/Command.java
@ -25,18 +25,44 @@ import java.io.IOException;
 import java.util.List;

 /**
+ * Defines a grouping command.
+ * This is an abstraction on how the {@link Collector} instances are created
+ * and how the results are retrieved from the {@link Collector} instances.
 *
+ * @lucene.experimental
 */
 public interface Command<T> {

+  /**
+   * Returns a list of {@link Collector} instances to be
+   * included in the search based on the .
+   *
+   * @return a list of {@link Collector} instances
+   * @throws IOException If I/O related errors occur
+   */
  List<Collector> create() throws IOException;

+  /**
+   * Returns the results that the collectors created
+   * by {@link #create()} contain after a search has been executed.
+   *
+   * @return The results of the collectors
+   */
  T result();

+  /**
+   * @return The key of this command to uniquely identify itself
+   */
  String getKey();

+  /**
+   * @return The group sort (overall sort)
+   */
  Sort getGroupSort();

+  /**
+   * @return The sort inside a group
+   */
  Sort getSortWithinGroup();

 }
--- a/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
@ -21,6 +21,10 @@ import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.MultiCollector;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
+import org.apache.lucene.search.grouping.TermAllGroupHeadsCollector;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.OpenBitSet;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.search.*;
 import org.apache.solr.search.grouping.distributed.shardresultserializer.ShardResultTransformer;
@ -30,7 +34,10 @@ import java.util.ArrayList;
 import java.util.List;

 /**
+ * Responsible for executing a search with a number of {@link Command} instances.
+ * A typical search can have more then one {@link Command} instances.
 *
+ * @lucene.experimental
 */
 public class CommandHandler {

@ -40,6 +47,7 @@ public class CommandHandler {
    private List<Command> commands = new ArrayList<Command>();
    private SolrIndexSearcher searcher;
    private boolean needDocSet = false;
+    private boolean truncateGroups = false;

    public Builder setQueryCommand(SolrIndexSearcher.QueryCommand queryCommand) {
      this.queryCommand = queryCommand;
@ -69,12 +77,17 @@ public class CommandHandler {
      return this;
    }

+    public Builder setTruncateGroups(boolean truncateGroups) {
+      this.truncateGroups = truncateGroups;
+      return this;
+    }
+
    public CommandHandler build() {
      if (queryCommand == null || searcher == null) {
        throw new IllegalStateException("All fields must be set");
      }

-      return new CommandHandler(queryCommand, commands, searcher, needDocSet);
+      return new CommandHandler(queryCommand, commands, searcher, needDocSet, truncateGroups);
    }

  }
@ -83,17 +96,19 @@ public class CommandHandler {
  private final List<Command> commands;
  private final SolrIndexSearcher searcher;
  private final boolean needDocset;
+  private final boolean truncateGroups;

  private DocSet docSet;

  private CommandHandler(SolrIndexSearcher.QueryCommand queryCommand,
                         List<Command> commands,
                         SolrIndexSearcher searcher,
-                         boolean needDocset) {
+                         boolean needDocset, boolean truncateGroups) {
    this.queryCommand = queryCommand;
    this.commands = commands;
    this.searcher = searcher;
    this.needDocset = needDocset;
+    this.truncateGroups = truncateGroups;
  }

  @SuppressWarnings("unchecked")
@ -109,31 +124,48 @@ public class CommandHandler {
    );
    Filter luceneFilter = pf.filter;
    Query query = QueryUtils.makeQueryable(queryCommand.getQuery());
-    Collector wrappedCollectors;
-    if (collectors.isEmpty()) {
-      wrappedCollectors = null;
+
+    if (truncateGroups && nrOfCommands > 0) {
+      docSet = computeGroupedDocSet(query, luceneFilter, collectors);
+    } else if (needDocset) {
+      docSet = computeDocSet(query, luceneFilter, collectors);
    } else {
-      wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands]));
+      searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands])));
+    }
+  }
+
+  private DocSet computeGroupedDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
+    Command firstCommand = commands.get(0);
+    AbstractAllGroupHeadsCollector termAllGroupHeadsCollector =
+        TermAllGroupHeadsCollector.create(firstCommand.getKey(), firstCommand.getSortWithinGroup());
+    if (collectors.isEmpty()) {
+      searcher.search(query, luceneFilter, termAllGroupHeadsCollector);
+    } else {
+      collectors.add(termAllGroupHeadsCollector);
+      searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])));
    }

-    if (wrappedCollectors == null && needDocset) {
-      int maxDoc = searcher.maxDoc();
-      DocSetCollector docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
-      searcher.search(query, luceneFilter, docSetCollector);
-      docSet = docSetCollector.getDocSet();
-    } else if (needDocset) {
-      int maxDoc = searcher.maxDoc();
-      DocSetCollector docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
-      searcher.search(query, luceneFilter, docSetCollector);
-      docSet = docSetCollector.getDocSet();
+    int maxDoc = searcher.maxDoc();
+    long[] bits = termAllGroupHeadsCollector.retrieveGroupHeads(maxDoc).getBits();
+    return new BitDocSet(new OpenBitSet(bits, bits.length));
+  }
+
+  private DocSet computeDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
+    int maxDoc = searcher.maxDoc();
+    DocSetCollector docSetCollector;
+    if (collectors.isEmpty()) {
+      docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
    } else {
-      searcher.search(query, luceneFilter, wrappedCollectors);
+      Collector wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
+      docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
    }
+    searcher.search(query, luceneFilter, docSetCollector);
+    return docSetCollector.getDocSet();
  }

  @SuppressWarnings("unchecked")
  public NamedList processResult(SolrIndexSearcher.QueryResult queryResult, ShardResultTransformer transformer) throws IOException {
-    if (needDocset) {
+    if (docSet != null) {
      queryResult.setDocSet(docSet);
    }
    return transformer.transform(commands);
--- a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
@ -22,6 +22,8 @@ import org.apache.solr.search.Grouping;

 /**
 * Encapsulates the grouping options like fields group sort and more specified by clients.
+ *
+ * @lucene.experimental
 */
 public class GroupingSpecification {

@ -38,6 +40,7 @@ public class GroupingSpecification {
  private boolean main;
  private Grouping.Format responseFormat;
  private boolean needScore;
+  private boolean truncateGroups;

  public String[] getFields() {
    return fields;
@ -155,4 +158,11 @@ public class GroupingSpecification {
    this.needScore = needScore;
  }

+  public boolean isTruncateGroups() {
+    return truncateGroups;
+  }
+
+  public void setTruncateGroups(boolean truncateGroups) {
+    this.truncateGroups = truncateGroups;
+  }
 }
--- a/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
@ -25,7 +25,9 @@ import org.apache.solr.search.DocSet;
 import java.io.IOException;

 /**
- * A collector that filters incoming doc ids that are not in the filter
+ * A collector that filters incoming doc ids that are not in the filter.
+ *
+ * @lucene.experimental
 */
 public class FilterCollector extends Collector {

--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java
@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;

 /**
 * Responsible for creating shard requests to the shards in the cluster to perform distributed grouping.
+ *
+ * @lucene.experimental
 */
 public interface ShardRequestFactory {

--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java
@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;

 /**
 * Responsible for processing shard responses.
+ *
+ * @lucene.experimental
 */
 public interface ShardResponseProcessor {

--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
@ -79,7 +79,7 @@ public class QueryCommand implements Command<QueryCommandResult> {
     *
     * @param searcher The searcher executing the
     * @return this
-     * @throws IOException
+     * @throws IOException If I/O related errors occur.
     */
    public Builder setDocSet(SolrIndexSearcher searcher) throws IOException {
      return setDocSet(searcher.getDocSet(query));
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java
@ -26,6 +26,8 @@ import java.io.IOException;
 * A <code>ShardResultTransformer</code> is responsible for transforming a grouped shard result into group related
 * structures (such as {@link org.apache.lucene.search.grouping.TopGroups} and {@link org.apache.lucene.search.grouping.SearchGroup})
 * and visa versa.
+ *
+ * @lucene.experimental
 */
 public interface ShardResultTransformer<T, R> {

--- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java
@ -26,6 +26,8 @@ import java.util.Map;

 /**
 * Responsible for transforming the grouped result into the final format for displaying purposes.
+ *
+ * @lucene.experimental
 */
 public interface EndResultTransformer {

--- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
@ -56,6 +56,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
    query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
+    query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);

    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
           tdate_a, "2010-04-20T11:00:00Z",
@ -105,7 +106,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
    for (int shard = 0; shard < clients.size(); shard++) {
      int groupValue = values[shard];
      for (int i = 500; i < 600; i++) {
-        index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1));
+        index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1), t1, shard);
      }
    }

@ -135,6 +136,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {

    // In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard
    query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true");
+    query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true");
+    query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);

    // We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur
    simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");