From d58350aead436dcd6fb13ac071f55271bf043df0 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 20 Sep 2011 18:15:23 +0000 Subject: [PATCH] SOLR-2776: Added support for group.truncate for distributed search. Also added some more jdocs. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1173286 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 2 +- .../handler/component/QueryComponent.java | 5 +- .../apache/solr/search/grouping/Command.java | 26 +++++++ .../solr/search/grouping/CommandHandler.java | 68 ++++++++++++++----- .../grouping/GroupingSpecification.java | 10 +++ .../grouping/collector/FilterCollector.java | 4 +- .../distributed/ShardRequestFactory.java | 2 + .../distributed/ShardResponseProcessor.java | 2 + .../distributed/command/QueryCommand.java | 2 +- .../ShardResultTransformer.java | 2 + .../EndResultTransformer.java | 2 + .../apache/solr/TestDistributedGrouping.java | 5 +- 12 files changed, 106 insertions(+), 24 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 33c0e8d9b99..6277d826e70 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -339,7 +339,7 @@ New Features can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name parameter to specify the named . (koji) -* SOLR-2066: Added support for distributed grouping. +* SOLR-2066,SOLR-2776: Added support for distributed grouping. (Martijn van Groningen, Jasper van Veghel, Matt Beaumont) Bug Fixes diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index b175ed1e8cf..9861860553c 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -190,6 +190,7 @@ public class QueryComponent extends SearchComponent groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false)); groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false)); groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0); + groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false)); } @@ -402,6 +403,7 @@ public class QueryComponent extends SearchComponent } else if (params.getBool("group.distibuted.second", false)) { CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder() .setQueryCommand(cmd) + .setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0) .setSearcher(searcher); for (String field : groupingSpec.getFields()) { @@ -453,7 +455,6 @@ public class QueryComponent extends SearchComponent int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0); boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100; - boolean truncateGroups = params.getBool(GroupParams.GROUP_TRUNCATE, false); Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped; int limitDefault = cmd.getLen(); // this is normally from "rows" @@ -466,7 +467,7 @@ public class QueryComponent extends SearchComponent .setDefaultTotalCount(defaultTotalCount) .setDocsPerGroupDefault(groupingSpec.getGroupLimit()) .setGroupOffsetDefault(groupingSpec.getGroupOffset()) - .setGetGroupedDocSet(truncateGroups); + .setGetGroupedDocSet(groupingSpec.isTruncateGroups()); if (groupingSpec.getFields() != null) { for (String field : groupingSpec.getFields()) { diff --git a/solr/core/src/java/org/apache/solr/search/grouping/Command.java b/solr/core/src/java/org/apache/solr/search/grouping/Command.java index bafb2ebe2fb..9b361fa5301 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/Command.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/Command.java @@ -25,18 +25,44 @@ import java.io.IOException; import java.util.List; /** + * Defines a grouping command. + * This is an abstraction on how the {@link Collector} instances are created + * and how the results are retrieved from the {@link Collector} instances. * + * @lucene.experimental */ public interface Command { + /** + * Returns a list of {@link Collector} instances to be + * included in the search based on the . + * + * @return a list of {@link Collector} instances + * @throws IOException If I/O related errors occur + */ List create() throws IOException; + /** + * Returns the results that the collectors created + * by {@link #create()} contain after a search has been executed. + * + * @return The results of the collectors + */ T result(); + /** + * @return The key of this command to uniquely identify itself + */ String getKey(); + /** + * @return The group sort (overall sort) + */ Sort getGroupSort(); + /** + * @return The sort inside a group + */ Sort getSortWithinGroup(); } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java b/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java index 98e53e56fe4..337e252c1ea 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java @@ -21,6 +21,10 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.Filter; import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; +import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector; +import org.apache.lucene.search.grouping.TermAllGroupHeadsCollector; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.OpenBitSet; import org.apache.solr.common.util.NamedList; import org.apache.solr.search.*; import org.apache.solr.search.grouping.distributed.shardresultserializer.ShardResultTransformer; @@ -30,7 +34,10 @@ import java.util.ArrayList; import java.util.List; /** + * Responsible for executing a search with a number of {@link Command} instances. + * A typical search can have more then one {@link Command} instances. * + * @lucene.experimental */ public class CommandHandler { @@ -40,6 +47,7 @@ public class CommandHandler { private List commands = new ArrayList(); private SolrIndexSearcher searcher; private boolean needDocSet = false; + private boolean truncateGroups = false; public Builder setQueryCommand(SolrIndexSearcher.QueryCommand queryCommand) { this.queryCommand = queryCommand; @@ -69,12 +77,17 @@ public class CommandHandler { return this; } + public Builder setTruncateGroups(boolean truncateGroups) { + this.truncateGroups = truncateGroups; + return this; + } + public CommandHandler build() { if (queryCommand == null || searcher == null) { throw new IllegalStateException("All fields must be set"); } - return new CommandHandler(queryCommand, commands, searcher, needDocSet); + return new CommandHandler(queryCommand, commands, searcher, needDocSet, truncateGroups); } } @@ -83,17 +96,19 @@ public class CommandHandler { private final List commands; private final SolrIndexSearcher searcher; private final boolean needDocset; + private final boolean truncateGroups; private DocSet docSet; private CommandHandler(SolrIndexSearcher.QueryCommand queryCommand, List commands, SolrIndexSearcher searcher, - boolean needDocset) { + boolean needDocset, boolean truncateGroups) { this.queryCommand = queryCommand; this.commands = commands; this.searcher = searcher; this.needDocset = needDocset; + this.truncateGroups = truncateGroups; } @SuppressWarnings("unchecked") @@ -109,31 +124,48 @@ public class CommandHandler { ); Filter luceneFilter = pf.filter; Query query = QueryUtils.makeQueryable(queryCommand.getQuery()); - Collector wrappedCollectors; - if (collectors.isEmpty()) { - wrappedCollectors = null; + + if (truncateGroups && nrOfCommands > 0) { + docSet = computeGroupedDocSet(query, luceneFilter, collectors); + } else if (needDocset) { + docSet = computeDocSet(query, luceneFilter, collectors); } else { - wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands])); + searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands]))); + } + } + + private DocSet computeGroupedDocSet(Query query, Filter luceneFilter, List collectors) throws IOException { + Command firstCommand = commands.get(0); + AbstractAllGroupHeadsCollector termAllGroupHeadsCollector = + TermAllGroupHeadsCollector.create(firstCommand.getKey(), firstCommand.getSortWithinGroup()); + if (collectors.isEmpty()) { + searcher.search(query, luceneFilter, termAllGroupHeadsCollector); + } else { + collectors.add(termAllGroupHeadsCollector); + searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]))); } - if (wrappedCollectors == null && needDocset) { - int maxDoc = searcher.maxDoc(); - DocSetCollector docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc); - searcher.search(query, luceneFilter, docSetCollector); - docSet = docSetCollector.getDocSet(); - } else if (needDocset) { - int maxDoc = searcher.maxDoc(); - DocSetCollector docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors); - searcher.search(query, luceneFilter, docSetCollector); - docSet = docSetCollector.getDocSet(); + int maxDoc = searcher.maxDoc(); + long[] bits = termAllGroupHeadsCollector.retrieveGroupHeads(maxDoc).getBits(); + return new BitDocSet(new OpenBitSet(bits, bits.length)); + } + + private DocSet computeDocSet(Query query, Filter luceneFilter, List collectors) throws IOException { + int maxDoc = searcher.maxDoc(); + DocSetCollector docSetCollector; + if (collectors.isEmpty()) { + docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc); } else { - searcher.search(query, luceneFilter, wrappedCollectors); + Collector wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])); + docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors); } + searcher.search(query, luceneFilter, docSetCollector); + return docSetCollector.getDocSet(); } @SuppressWarnings("unchecked") public NamedList processResult(SolrIndexSearcher.QueryResult queryResult, ShardResultTransformer transformer) throws IOException { - if (needDocset) { + if (docSet != null) { queryResult.setDocSet(docSet); } return transformer.transform(commands); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java index cf4ae0274e7..21b6713c040 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java @@ -22,6 +22,8 @@ import org.apache.solr.search.Grouping; /** * Encapsulates the grouping options like fields group sort and more specified by clients. + * + * @lucene.experimental */ public class GroupingSpecification { @@ -38,6 +40,7 @@ public class GroupingSpecification { private boolean main; private Grouping.Format responseFormat; private boolean needScore; + private boolean truncateGroups; public String[] getFields() { return fields; @@ -155,4 +158,11 @@ public class GroupingSpecification { this.needScore = needScore; } + public boolean isTruncateGroups() { + return truncateGroups; + } + + public void setTruncateGroups(boolean truncateGroups) { + this.truncateGroups = truncateGroups; + } } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java b/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java index 4ebcf201ed4..9e157cefe53 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java @@ -25,7 +25,9 @@ import org.apache.solr.search.DocSet; import java.io.IOException; /** - * A collector that filters incoming doc ids that are not in the filter + * A collector that filters incoming doc ids that are not in the filter. + * + * @lucene.experimental */ public class FilterCollector extends Collector { diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java index 92726f55120..88cdd8ee931 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardRequestFactory.java @@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest; /** * Responsible for creating shard requests to the shards in the cluster to perform distributed grouping. + * + * @lucene.experimental */ public interface ShardRequestFactory { diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java index fe6fda3f487..729685a7770 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/ShardResponseProcessor.java @@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest; /** * Responsible for processing shard responses. + * + * @lucene.experimental */ public interface ShardResponseProcessor { diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java index d6e1517b835..363b8b87ed4 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java @@ -79,7 +79,7 @@ public class QueryCommand implements Command { * * @param searcher The searcher executing the * @return this - * @throws IOException + * @throws IOException If I/O related errors occur. */ public Builder setDocSet(SolrIndexSearcher searcher) throws IOException { return setDocSet(searcher.getDocSet(query)); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java index c0902f78a8e..eb4d5c4c9b5 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/ShardResultTransformer.java @@ -26,6 +26,8 @@ import java.io.IOException; * A ShardResultTransformer is responsible for transforming a grouped shard result into group related * structures (such as {@link org.apache.lucene.search.grouping.TopGroups} and {@link org.apache.lucene.search.grouping.SearchGroup}) * and visa versa. + * + * @lucene.experimental */ public interface ShardResultTransformer { diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java index b58b4b7b354..efb58598b93 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/EndResultTransformer.java @@ -26,6 +26,8 @@ import java.util.Map; /** * Responsible for transforming the grouped result into the final format for displaying purposes. + * + * @lucene.experimental */ public interface EndResultTransformer { diff --git a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java index 7235942b1de..e4b9e9c8a92 100755 --- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java +++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java @@ -56,6 +56,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1); query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH"); + query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1); indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men", tdate_a, "2010-04-20T11:00:00Z", @@ -105,7 +106,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { for (int shard = 0; shard < clients.size(); shard++) { int groupValue = values[shard]; for (int i = 500; i < 600; i++) { - index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1)); + index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1), t1, shard); } } @@ -135,6 +136,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { // In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true"); + query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true"); + query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1); // We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");