SOLR-2776: Added support for group.truncate for distributed search. Also added some more jdocs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1173286 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Martijn van Groningen 2011-09-20 18:15:23 +00:00
parent 3013098230
commit d58350aead
12 changed files with 106 additions and 24 deletions

View File

@ -339,7 +339,7 @@ New Features
can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name
parameter to specify the named <boundaryScanner/>. (koji)
* SOLR-2066: Added support for distributed grouping.
* SOLR-2066,SOLR-2776: Added support for distributed grouping.
(Martijn van Groningen, Jasper van Veghel, Matt Beaumont)
Bug Fixes

View File

@ -190,6 +190,7 @@ public class QueryComponent extends SearchComponent
groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0);
groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
}
@ -402,6 +403,7 @@ public class QueryComponent extends SearchComponent
} else if (params.getBool("group.distibuted.second", false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
@ -453,7 +455,6 @@ public class QueryComponent extends SearchComponent
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
boolean truncateGroups = params.getBool(GroupParams.GROUP_TRUNCATE, false);
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
int limitDefault = cmd.getLen(); // this is normally from "rows"
@ -466,7 +467,7 @@ public class QueryComponent extends SearchComponent
.setDefaultTotalCount(defaultTotalCount)
.setDocsPerGroupDefault(groupingSpec.getGroupLimit())
.setGroupOffsetDefault(groupingSpec.getGroupOffset())
.setGetGroupedDocSet(truncateGroups);
.setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {

View File

@ -25,18 +25,44 @@ import java.io.IOException;
import java.util.List;
/**
* Defines a grouping command.
* This is an abstraction on how the {@link Collector} instances are created
* and how the results are retrieved from the {@link Collector} instances.
*
* @lucene.experimental
*/
public interface Command<T> {
/**
* Returns a list of {@link Collector} instances to be
* included in the search based on the .
*
* @return a list of {@link Collector} instances
* @throws IOException If I/O related errors occur
*/
List<Collector> create() throws IOException;
/**
* Returns the results that the collectors created
* by {@link #create()} contain after a search has been executed.
*
* @return The results of the collectors
*/
T result();
/**
* @return The key of this command to uniquely identify itself
*/
String getKey();
/**
* @return The group sort (overall sort)
*/
Sort getGroupSort();
/**
* @return The sort inside a group
*/
Sort getSortWithinGroup();
}

View File

@ -21,6 +21,10 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
import org.apache.lucene.search.grouping.TermAllGroupHeadsCollector;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.*;
import org.apache.solr.search.grouping.distributed.shardresultserializer.ShardResultTransformer;
@ -30,7 +34,10 @@ import java.util.ArrayList;
import java.util.List;
/**
* Responsible for executing a search with a number of {@link Command} instances.
* A typical search can have more then one {@link Command} instances.
*
* @lucene.experimental
*/
public class CommandHandler {
@ -40,6 +47,7 @@ public class CommandHandler {
private List<Command> commands = new ArrayList<Command>();
private SolrIndexSearcher searcher;
private boolean needDocSet = false;
private boolean truncateGroups = false;
public Builder setQueryCommand(SolrIndexSearcher.QueryCommand queryCommand) {
this.queryCommand = queryCommand;
@ -69,12 +77,17 @@ public class CommandHandler {
return this;
}
public Builder setTruncateGroups(boolean truncateGroups) {
this.truncateGroups = truncateGroups;
return this;
}
public CommandHandler build() {
if (queryCommand == null || searcher == null) {
throw new IllegalStateException("All fields must be set");
}
return new CommandHandler(queryCommand, commands, searcher, needDocSet);
return new CommandHandler(queryCommand, commands, searcher, needDocSet, truncateGroups);
}
}
@ -83,17 +96,19 @@ public class CommandHandler {
private final List<Command> commands;
private final SolrIndexSearcher searcher;
private final boolean needDocset;
private final boolean truncateGroups;
private DocSet docSet;
private CommandHandler(SolrIndexSearcher.QueryCommand queryCommand,
List<Command> commands,
SolrIndexSearcher searcher,
boolean needDocset) {
boolean needDocset, boolean truncateGroups) {
this.queryCommand = queryCommand;
this.commands = commands;
this.searcher = searcher;
this.needDocset = needDocset;
this.truncateGroups = truncateGroups;
}
@SuppressWarnings("unchecked")
@ -109,31 +124,48 @@ public class CommandHandler {
);
Filter luceneFilter = pf.filter;
Query query = QueryUtils.makeQueryable(queryCommand.getQuery());
Collector wrappedCollectors;
if (collectors.isEmpty()) {
wrappedCollectors = null;
if (truncateGroups && nrOfCommands > 0) {
docSet = computeGroupedDocSet(query, luceneFilter, collectors);
} else if (needDocset) {
docSet = computeDocSet(query, luceneFilter, collectors);
} else {
wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands]));
searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands])));
}
}
if (wrappedCollectors == null && needDocset) {
int maxDoc = searcher.maxDoc();
DocSetCollector docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
searcher.search(query, luceneFilter, docSetCollector);
docSet = docSetCollector.getDocSet();
} else if (needDocset) {
int maxDoc = searcher.maxDoc();
DocSetCollector docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
searcher.search(query, luceneFilter, docSetCollector);
docSet = docSetCollector.getDocSet();
private DocSet computeGroupedDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
Command firstCommand = commands.get(0);
AbstractAllGroupHeadsCollector termAllGroupHeadsCollector =
TermAllGroupHeadsCollector.create(firstCommand.getKey(), firstCommand.getSortWithinGroup());
if (collectors.isEmpty()) {
searcher.search(query, luceneFilter, termAllGroupHeadsCollector);
} else {
searcher.search(query, luceneFilter, wrappedCollectors);
collectors.add(termAllGroupHeadsCollector);
searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])));
}
int maxDoc = searcher.maxDoc();
long[] bits = termAllGroupHeadsCollector.retrieveGroupHeads(maxDoc).getBits();
return new BitDocSet(new OpenBitSet(bits, bits.length));
}
private DocSet computeDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
int maxDoc = searcher.maxDoc();
DocSetCollector docSetCollector;
if (collectors.isEmpty()) {
docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
} else {
Collector wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
}
searcher.search(query, luceneFilter, docSetCollector);
return docSetCollector.getDocSet();
}
@SuppressWarnings("unchecked")
public NamedList processResult(SolrIndexSearcher.QueryResult queryResult, ShardResultTransformer transformer) throws IOException {
if (needDocset) {
if (docSet != null) {
queryResult.setDocSet(docSet);
}
return transformer.transform(commands);

View File

@ -22,6 +22,8 @@ import org.apache.solr.search.Grouping;
/**
* Encapsulates the grouping options like fields group sort and more specified by clients.
*
* @lucene.experimental
*/
public class GroupingSpecification {
@ -38,6 +40,7 @@ public class GroupingSpecification {
private boolean main;
private Grouping.Format responseFormat;
private boolean needScore;
private boolean truncateGroups;
public String[] getFields() {
return fields;
@ -155,4 +158,11 @@ public class GroupingSpecification {
this.needScore = needScore;
}
public boolean isTruncateGroups() {
return truncateGroups;
}
public void setTruncateGroups(boolean truncateGroups) {
this.truncateGroups = truncateGroups;
}
}

View File

@ -25,7 +25,9 @@ import org.apache.solr.search.DocSet;
import java.io.IOException;
/**
* A collector that filters incoming doc ids that are not in the filter
* A collector that filters incoming doc ids that are not in the filter.
*
* @lucene.experimental
*/
public class FilterCollector extends Collector {

View File

@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
/**
* Responsible for creating shard requests to the shards in the cluster to perform distributed grouping.
*
* @lucene.experimental
*/
public interface ShardRequestFactory {

View File

@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
/**
* Responsible for processing shard responses.
*
* @lucene.experimental
*/
public interface ShardResponseProcessor {

View File

@ -79,7 +79,7 @@ public class QueryCommand implements Command<QueryCommandResult> {
*
* @param searcher The searcher executing the
* @return this
* @throws IOException
* @throws IOException If I/O related errors occur.
*/
public Builder setDocSet(SolrIndexSearcher searcher) throws IOException {
return setDocSet(searcher.getDocSet(query));

View File

@ -26,6 +26,8 @@ import java.io.IOException;
* A <code>ShardResultTransformer</code> is responsible for transforming a grouped shard result into group related
* structures (such as {@link org.apache.lucene.search.grouping.TopGroups} and {@link org.apache.lucene.search.grouping.SearchGroup})
* and visa versa.
*
* @lucene.experimental
*/
public interface ShardResultTransformer<T, R> {

View File

@ -26,6 +26,8 @@ import java.util.Map;
/**
* Responsible for transforming the grouped result into the final format for displaying purposes.
*
* @lucene.experimental
*/
public interface EndResultTransformer {

View File

@ -56,6 +56,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
@ -105,7 +106,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
for (int shard = 0; shard < clients.size(); shard++) {
int groupValue = values[shard];
for (int i = 500; i < 600; i++) {
index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1));
index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1), t1, shard);
}
}
@ -135,6 +136,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
// In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
// We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur
simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");