SOLR-2776: Added support for group.truncate for distributed search. Also added some more jdocs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1173286 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Martijn van Groningen 2011-09-20 18:15:23 +00:00
parent 3013098230
commit d58350aead
12 changed files with 106 additions and 24 deletions

View File

@ -339,7 +339,7 @@ New Features
can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name
parameter to specify the named <boundaryScanner/>. (koji) parameter to specify the named <boundaryScanner/>. (koji)
* SOLR-2066: Added support for distributed grouping. * SOLR-2066,SOLR-2776: Added support for distributed grouping.
(Martijn van Groningen, Jasper van Veghel, Matt Beaumont) (Martijn van Groningen, Jasper van Veghel, Matt Beaumont)
Bug Fixes Bug Fixes

View File

@ -190,6 +190,7 @@ public class QueryComponent extends SearchComponent
groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false)); groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false)); groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0); groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0);
groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
} }
@ -402,6 +403,7 @@ public class QueryComponent extends SearchComponent
} else if (params.getBool("group.distibuted.second", false)) { } else if (params.getBool("group.distibuted.second", false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder() CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd) .setQueryCommand(cmd)
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher); .setSearcher(searcher);
for (String field : groupingSpec.getFields()) { for (String field : groupingSpec.getFields()) {
@ -453,7 +455,6 @@ public class QueryComponent extends SearchComponent
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0); int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100; boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
boolean truncateGroups = params.getBool(GroupParams.GROUP_TRUNCATE, false);
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ? Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped; Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
int limitDefault = cmd.getLen(); // this is normally from "rows" int limitDefault = cmd.getLen(); // this is normally from "rows"
@ -466,7 +467,7 @@ public class QueryComponent extends SearchComponent
.setDefaultTotalCount(defaultTotalCount) .setDefaultTotalCount(defaultTotalCount)
.setDocsPerGroupDefault(groupingSpec.getGroupLimit()) .setDocsPerGroupDefault(groupingSpec.getGroupLimit())
.setGroupOffsetDefault(groupingSpec.getGroupOffset()) .setGroupOffsetDefault(groupingSpec.getGroupOffset())
.setGetGroupedDocSet(truncateGroups); .setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) { if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) { for (String field : groupingSpec.getFields()) {

View File

@ -25,18 +25,44 @@ import java.io.IOException;
import java.util.List; import java.util.List;
/** /**
* Defines a grouping command.
* This is an abstraction on how the {@link Collector} instances are created
* and how the results are retrieved from the {@link Collector} instances.
* *
* @lucene.experimental
*/ */
public interface Command<T> { public interface Command<T> {
/**
* Returns a list of {@link Collector} instances to be
* included in the search based on the .
*
* @return a list of {@link Collector} instances
* @throws IOException If I/O related errors occur
*/
List<Collector> create() throws IOException; List<Collector> create() throws IOException;
/**
* Returns the results that the collectors created
* by {@link #create()} contain after a search has been executed.
*
* @return The results of the collectors
*/
T result(); T result();
/**
* @return The key of this command to uniquely identify itself
*/
String getKey(); String getKey();
/**
* @return The group sort (overall sort)
*/
Sort getGroupSort(); Sort getGroupSort();
/**
* @return The sort inside a group
*/
Sort getSortWithinGroup(); Sort getSortWithinGroup();
} }

View File

@ -21,6 +21,10 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
import org.apache.lucene.search.grouping.TermAllGroupHeadsCollector;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.*; import org.apache.solr.search.*;
import org.apache.solr.search.grouping.distributed.shardresultserializer.ShardResultTransformer; import org.apache.solr.search.grouping.distributed.shardresultserializer.ShardResultTransformer;
@ -30,7 +34,10 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
* Responsible for executing a search with a number of {@link Command} instances.
* A typical search can have more then one {@link Command} instances.
* *
* @lucene.experimental
*/ */
public class CommandHandler { public class CommandHandler {
@ -40,6 +47,7 @@ public class CommandHandler {
private List<Command> commands = new ArrayList<Command>(); private List<Command> commands = new ArrayList<Command>();
private SolrIndexSearcher searcher; private SolrIndexSearcher searcher;
private boolean needDocSet = false; private boolean needDocSet = false;
private boolean truncateGroups = false;
public Builder setQueryCommand(SolrIndexSearcher.QueryCommand queryCommand) { public Builder setQueryCommand(SolrIndexSearcher.QueryCommand queryCommand) {
this.queryCommand = queryCommand; this.queryCommand = queryCommand;
@ -69,12 +77,17 @@ public class CommandHandler {
return this; return this;
} }
public Builder setTruncateGroups(boolean truncateGroups) {
this.truncateGroups = truncateGroups;
return this;
}
public CommandHandler build() { public CommandHandler build() {
if (queryCommand == null || searcher == null) { if (queryCommand == null || searcher == null) {
throw new IllegalStateException("All fields must be set"); throw new IllegalStateException("All fields must be set");
} }
return new CommandHandler(queryCommand, commands, searcher, needDocSet); return new CommandHandler(queryCommand, commands, searcher, needDocSet, truncateGroups);
} }
} }
@ -83,17 +96,19 @@ public class CommandHandler {
private final List<Command> commands; private final List<Command> commands;
private final SolrIndexSearcher searcher; private final SolrIndexSearcher searcher;
private final boolean needDocset; private final boolean needDocset;
private final boolean truncateGroups;
private DocSet docSet; private DocSet docSet;
private CommandHandler(SolrIndexSearcher.QueryCommand queryCommand, private CommandHandler(SolrIndexSearcher.QueryCommand queryCommand,
List<Command> commands, List<Command> commands,
SolrIndexSearcher searcher, SolrIndexSearcher searcher,
boolean needDocset) { boolean needDocset, boolean truncateGroups) {
this.queryCommand = queryCommand; this.queryCommand = queryCommand;
this.commands = commands; this.commands = commands;
this.searcher = searcher; this.searcher = searcher;
this.needDocset = needDocset; this.needDocset = needDocset;
this.truncateGroups = truncateGroups;
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@ -109,31 +124,48 @@ public class CommandHandler {
); );
Filter luceneFilter = pf.filter; Filter luceneFilter = pf.filter;
Query query = QueryUtils.makeQueryable(queryCommand.getQuery()); Query query = QueryUtils.makeQueryable(queryCommand.getQuery());
Collector wrappedCollectors;
if (collectors.isEmpty()) { if (truncateGroups && nrOfCommands > 0) {
wrappedCollectors = null; docSet = computeGroupedDocSet(query, luceneFilter, collectors);
} else if (needDocset) {
docSet = computeDocSet(query, luceneFilter, collectors);
} else { } else {
wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands])); searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands])));
}
}
private DocSet computeGroupedDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
Command firstCommand = commands.get(0);
AbstractAllGroupHeadsCollector termAllGroupHeadsCollector =
TermAllGroupHeadsCollector.create(firstCommand.getKey(), firstCommand.getSortWithinGroup());
if (collectors.isEmpty()) {
searcher.search(query, luceneFilter, termAllGroupHeadsCollector);
} else {
collectors.add(termAllGroupHeadsCollector);
searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])));
} }
if (wrappedCollectors == null && needDocset) { int maxDoc = searcher.maxDoc();
int maxDoc = searcher.maxDoc(); long[] bits = termAllGroupHeadsCollector.retrieveGroupHeads(maxDoc).getBits();
DocSetCollector docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc); return new BitDocSet(new OpenBitSet(bits, bits.length));
searcher.search(query, luceneFilter, docSetCollector); }
docSet = docSetCollector.getDocSet();
} else if (needDocset) { private DocSet computeDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
int maxDoc = searcher.maxDoc(); int maxDoc = searcher.maxDoc();
DocSetCollector docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors); DocSetCollector docSetCollector;
searcher.search(query, luceneFilter, docSetCollector); if (collectors.isEmpty()) {
docSet = docSetCollector.getDocSet(); docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
} else { } else {
searcher.search(query, luceneFilter, wrappedCollectors); Collector wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
} }
searcher.search(query, luceneFilter, docSetCollector);
return docSetCollector.getDocSet();
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public NamedList processResult(SolrIndexSearcher.QueryResult queryResult, ShardResultTransformer transformer) throws IOException { public NamedList processResult(SolrIndexSearcher.QueryResult queryResult, ShardResultTransformer transformer) throws IOException {
if (needDocset) { if (docSet != null) {
queryResult.setDocSet(docSet); queryResult.setDocSet(docSet);
} }
return transformer.transform(commands); return transformer.transform(commands);

View File

@ -22,6 +22,8 @@ import org.apache.solr.search.Grouping;
/** /**
* Encapsulates the grouping options like fields group sort and more specified by clients. * Encapsulates the grouping options like fields group sort and more specified by clients.
*
* @lucene.experimental
*/ */
public class GroupingSpecification { public class GroupingSpecification {
@ -38,6 +40,7 @@ public class GroupingSpecification {
private boolean main; private boolean main;
private Grouping.Format responseFormat; private Grouping.Format responseFormat;
private boolean needScore; private boolean needScore;
private boolean truncateGroups;
public String[] getFields() { public String[] getFields() {
return fields; return fields;
@ -155,4 +158,11 @@ public class GroupingSpecification {
this.needScore = needScore; this.needScore = needScore;
} }
public boolean isTruncateGroups() {
return truncateGroups;
}
public void setTruncateGroups(boolean truncateGroups) {
this.truncateGroups = truncateGroups;
}
} }

View File

@ -25,7 +25,9 @@ import org.apache.solr.search.DocSet;
import java.io.IOException; import java.io.IOException;
/** /**
* A collector that filters incoming doc ids that are not in the filter * A collector that filters incoming doc ids that are not in the filter.
*
* @lucene.experimental
*/ */
public class FilterCollector extends Collector { public class FilterCollector extends Collector {

View File

@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
/** /**
* Responsible for creating shard requests to the shards in the cluster to perform distributed grouping. * Responsible for creating shard requests to the shards in the cluster to perform distributed grouping.
*
* @lucene.experimental
*/ */
public interface ShardRequestFactory { public interface ShardRequestFactory {

View File

@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
/** /**
* Responsible for processing shard responses. * Responsible for processing shard responses.
*
* @lucene.experimental
*/ */
public interface ShardResponseProcessor { public interface ShardResponseProcessor {

View File

@ -79,7 +79,7 @@ public class QueryCommand implements Command<QueryCommandResult> {
* *
* @param searcher The searcher executing the * @param searcher The searcher executing the
* @return this * @return this
* @throws IOException * @throws IOException If I/O related errors occur.
*/ */
public Builder setDocSet(SolrIndexSearcher searcher) throws IOException { public Builder setDocSet(SolrIndexSearcher searcher) throws IOException {
return setDocSet(searcher.getDocSet(query)); return setDocSet(searcher.getDocSet(query));

View File

@ -26,6 +26,8 @@ import java.io.IOException;
* A <code>ShardResultTransformer</code> is responsible for transforming a grouped shard result into group related * A <code>ShardResultTransformer</code> is responsible for transforming a grouped shard result into group related
* structures (such as {@link org.apache.lucene.search.grouping.TopGroups} and {@link org.apache.lucene.search.grouping.SearchGroup}) * structures (such as {@link org.apache.lucene.search.grouping.TopGroups} and {@link org.apache.lucene.search.grouping.SearchGroup})
* and visa versa. * and visa versa.
*
* @lucene.experimental
*/ */
public interface ShardResultTransformer<T, R> { public interface ShardResultTransformer<T, R> {

View File

@ -26,6 +26,8 @@ import java.util.Map;
/** /**
* Responsible for transforming the grouped result into the final format for displaying purposes. * Responsible for transforming the grouped result into the final format for displaying purposes.
*
* @lucene.experimental
*/ */
public interface EndResultTransformer { public interface EndResultTransformer {

View File

@ -56,6 +56,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1); query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH"); query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men", indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z", tdate_a, "2010-04-20T11:00:00Z",
@ -105,7 +106,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
for (int shard = 0; shard < clients.size(); shard++) { for (int shard = 0; shard < clients.size(); shard++) {
int groupValue = values[shard]; int groupValue = values[shard];
for (int i = 500; i < 600; i++) { for (int i = 500; i < 600; i++) {
index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1)); index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1), t1, shard);
} }
} }
@ -135,6 +136,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
// In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard // In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true"); query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
// We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur // We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur
simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc"); simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");