mirror of https://github.com/apache/lucene.git
SOLR-2776: Added support for group.truncate for distributed search. Also added some more jdocs.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1173286 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3013098230
commit
d58350aead
|
@ -339,7 +339,7 @@ New Features
|
|||
can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name
|
||||
parameter to specify the named <boundaryScanner/>. (koji)
|
||||
|
||||
* SOLR-2066: Added support for distributed grouping.
|
||||
* SOLR-2066,SOLR-2776: Added support for distributed grouping.
|
||||
(Martijn van Groningen, Jasper van Veghel, Matt Beaumont)
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -190,6 +190,7 @@ public class QueryComponent extends SearchComponent
|
|||
groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
|
||||
groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
|
||||
groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0);
|
||||
groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
|
||||
}
|
||||
|
||||
|
||||
|
@ -402,6 +403,7 @@ public class QueryComponent extends SearchComponent
|
|||
} else if (params.getBool("group.distibuted.second", false)) {
|
||||
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
|
||||
.setQueryCommand(cmd)
|
||||
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
|
||||
.setSearcher(searcher);
|
||||
|
||||
for (String field : groupingSpec.getFields()) {
|
||||
|
@ -453,7 +455,6 @@ public class QueryComponent extends SearchComponent
|
|||
|
||||
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
|
||||
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
|
||||
boolean truncateGroups = params.getBool(GroupParams.GROUP_TRUNCATE, false);
|
||||
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
|
||||
Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
|
||||
int limitDefault = cmd.getLen(); // this is normally from "rows"
|
||||
|
@ -466,7 +467,7 @@ public class QueryComponent extends SearchComponent
|
|||
.setDefaultTotalCount(defaultTotalCount)
|
||||
.setDocsPerGroupDefault(groupingSpec.getGroupLimit())
|
||||
.setGroupOffsetDefault(groupingSpec.getGroupOffset())
|
||||
.setGetGroupedDocSet(truncateGroups);
|
||||
.setGetGroupedDocSet(groupingSpec.isTruncateGroups());
|
||||
|
||||
if (groupingSpec.getFields() != null) {
|
||||
for (String field : groupingSpec.getFields()) {
|
||||
|
|
|
@ -25,18 +25,44 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Defines a grouping command.
|
||||
* This is an abstraction on how the {@link Collector} instances are created
|
||||
* and how the results are retrieved from the {@link Collector} instances.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface Command<T> {
|
||||
|
||||
/**
|
||||
* Returns a list of {@link Collector} instances to be
|
||||
* included in the search based on the .
|
||||
*
|
||||
* @return a list of {@link Collector} instances
|
||||
* @throws IOException If I/O related errors occur
|
||||
*/
|
||||
List<Collector> create() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the results that the collectors created
|
||||
* by {@link #create()} contain after a search has been executed.
|
||||
*
|
||||
* @return The results of the collectors
|
||||
*/
|
||||
T result();
|
||||
|
||||
/**
|
||||
* @return The key of this command to uniquely identify itself
|
||||
*/
|
||||
String getKey();
|
||||
|
||||
/**
|
||||
* @return The group sort (overall sort)
|
||||
*/
|
||||
Sort getGroupSort();
|
||||
|
||||
/**
|
||||
* @return The sort inside a group
|
||||
*/
|
||||
Sort getSortWithinGroup();
|
||||
|
||||
}
|
||||
|
|
|
@ -21,6 +21,10 @@ import org.apache.lucene.search.Collector;
|
|||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
|
||||
import org.apache.lucene.search.grouping.TermAllGroupHeadsCollector;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.search.*;
|
||||
import org.apache.solr.search.grouping.distributed.shardresultserializer.ShardResultTransformer;
|
||||
|
@ -30,7 +34,10 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Responsible for executing a search with a number of {@link Command} instances.
|
||||
* A typical search can have more then one {@link Command} instances.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CommandHandler {
|
||||
|
||||
|
@ -40,6 +47,7 @@ public class CommandHandler {
|
|||
private List<Command> commands = new ArrayList<Command>();
|
||||
private SolrIndexSearcher searcher;
|
||||
private boolean needDocSet = false;
|
||||
private boolean truncateGroups = false;
|
||||
|
||||
public Builder setQueryCommand(SolrIndexSearcher.QueryCommand queryCommand) {
|
||||
this.queryCommand = queryCommand;
|
||||
|
@ -69,12 +77,17 @@ public class CommandHandler {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder setTruncateGroups(boolean truncateGroups) {
|
||||
this.truncateGroups = truncateGroups;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CommandHandler build() {
|
||||
if (queryCommand == null || searcher == null) {
|
||||
throw new IllegalStateException("All fields must be set");
|
||||
}
|
||||
|
||||
return new CommandHandler(queryCommand, commands, searcher, needDocSet);
|
||||
return new CommandHandler(queryCommand, commands, searcher, needDocSet, truncateGroups);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -83,17 +96,19 @@ public class CommandHandler {
|
|||
private final List<Command> commands;
|
||||
private final SolrIndexSearcher searcher;
|
||||
private final boolean needDocset;
|
||||
private final boolean truncateGroups;
|
||||
|
||||
private DocSet docSet;
|
||||
|
||||
private CommandHandler(SolrIndexSearcher.QueryCommand queryCommand,
|
||||
List<Command> commands,
|
||||
SolrIndexSearcher searcher,
|
||||
boolean needDocset) {
|
||||
boolean needDocset, boolean truncateGroups) {
|
||||
this.queryCommand = queryCommand;
|
||||
this.commands = commands;
|
||||
this.searcher = searcher;
|
||||
this.needDocset = needDocset;
|
||||
this.truncateGroups = truncateGroups;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -109,31 +124,48 @@ public class CommandHandler {
|
|||
);
|
||||
Filter luceneFilter = pf.filter;
|
||||
Query query = QueryUtils.makeQueryable(queryCommand.getQuery());
|
||||
Collector wrappedCollectors;
|
||||
if (collectors.isEmpty()) {
|
||||
wrappedCollectors = null;
|
||||
|
||||
if (truncateGroups && nrOfCommands > 0) {
|
||||
docSet = computeGroupedDocSet(query, luceneFilter, collectors);
|
||||
} else if (needDocset) {
|
||||
docSet = computeDocSet(query, luceneFilter, collectors);
|
||||
} else {
|
||||
wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands]));
|
||||
searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[nrOfCommands])));
|
||||
}
|
||||
}
|
||||
|
||||
if (wrappedCollectors == null && needDocset) {
|
||||
int maxDoc = searcher.maxDoc();
|
||||
DocSetCollector docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
|
||||
searcher.search(query, luceneFilter, docSetCollector);
|
||||
docSet = docSetCollector.getDocSet();
|
||||
} else if (needDocset) {
|
||||
int maxDoc = searcher.maxDoc();
|
||||
DocSetCollector docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
|
||||
searcher.search(query, luceneFilter, docSetCollector);
|
||||
docSet = docSetCollector.getDocSet();
|
||||
private DocSet computeGroupedDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
|
||||
Command firstCommand = commands.get(0);
|
||||
AbstractAllGroupHeadsCollector termAllGroupHeadsCollector =
|
||||
TermAllGroupHeadsCollector.create(firstCommand.getKey(), firstCommand.getSortWithinGroup());
|
||||
if (collectors.isEmpty()) {
|
||||
searcher.search(query, luceneFilter, termAllGroupHeadsCollector);
|
||||
} else {
|
||||
searcher.search(query, luceneFilter, wrappedCollectors);
|
||||
collectors.add(termAllGroupHeadsCollector);
|
||||
searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])));
|
||||
}
|
||||
|
||||
int maxDoc = searcher.maxDoc();
|
||||
long[] bits = termAllGroupHeadsCollector.retrieveGroupHeads(maxDoc).getBits();
|
||||
return new BitDocSet(new OpenBitSet(bits, bits.length));
|
||||
}
|
||||
|
||||
private DocSet computeDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
|
||||
int maxDoc = searcher.maxDoc();
|
||||
DocSetCollector docSetCollector;
|
||||
if (collectors.isEmpty()) {
|
||||
docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
|
||||
} else {
|
||||
Collector wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
|
||||
docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
|
||||
}
|
||||
searcher.search(query, luceneFilter, docSetCollector);
|
||||
return docSetCollector.getDocSet();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public NamedList processResult(SolrIndexSearcher.QueryResult queryResult, ShardResultTransformer transformer) throws IOException {
|
||||
if (needDocset) {
|
||||
if (docSet != null) {
|
||||
queryResult.setDocSet(docSet);
|
||||
}
|
||||
return transformer.transform(commands);
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.apache.solr.search.Grouping;
|
|||
|
||||
/**
|
||||
* Encapsulates the grouping options like fields group sort and more specified by clients.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class GroupingSpecification {
|
||||
|
||||
|
@ -38,6 +40,7 @@ public class GroupingSpecification {
|
|||
private boolean main;
|
||||
private Grouping.Format responseFormat;
|
||||
private boolean needScore;
|
||||
private boolean truncateGroups;
|
||||
|
||||
public String[] getFields() {
|
||||
return fields;
|
||||
|
@ -155,4 +158,11 @@ public class GroupingSpecification {
|
|||
this.needScore = needScore;
|
||||
}
|
||||
|
||||
public boolean isTruncateGroups() {
|
||||
return truncateGroups;
|
||||
}
|
||||
|
||||
public void setTruncateGroups(boolean truncateGroups) {
|
||||
this.truncateGroups = truncateGroups;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,9 @@ import org.apache.solr.search.DocSet;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A collector that filters incoming doc ids that are not in the filter
|
||||
* A collector that filters incoming doc ids that are not in the filter.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FilterCollector extends Collector {
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
|
|||
|
||||
/**
|
||||
* Responsible for creating shard requests to the shards in the cluster to perform distributed grouping.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface ShardRequestFactory {
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.apache.solr.handler.component.ShardRequest;
|
|||
|
||||
/**
|
||||
* Responsible for processing shard responses.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface ShardResponseProcessor {
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ public class QueryCommand implements Command<QueryCommandResult> {
|
|||
*
|
||||
* @param searcher The searcher executing the
|
||||
* @return this
|
||||
* @throws IOException
|
||||
* @throws IOException If I/O related errors occur.
|
||||
*/
|
||||
public Builder setDocSet(SolrIndexSearcher searcher) throws IOException {
|
||||
return setDocSet(searcher.getDocSet(query));
|
||||
|
|
|
@ -26,6 +26,8 @@ import java.io.IOException;
|
|||
* A <code>ShardResultTransformer</code> is responsible for transforming a grouped shard result into group related
|
||||
* structures (such as {@link org.apache.lucene.search.grouping.TopGroups} and {@link org.apache.lucene.search.grouping.SearchGroup})
|
||||
* and visa versa.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface ShardResultTransformer<T, R> {
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Responsible for transforming the grouped result into the final format for displaying purposes.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface EndResultTransformer {
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
|
|||
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
|
||||
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
|
||||
query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
|
||||
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
|
||||
|
||||
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
|
||||
tdate_a, "2010-04-20T11:00:00Z",
|
||||
|
@ -105,7 +106,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
|
|||
for (int shard = 0; shard < clients.size(); shard++) {
|
||||
int groupValue = values[shard];
|
||||
for (int i = 500; i < 600; i++) {
|
||||
index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1));
|
||||
index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1), t1, shard);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,6 +136,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
|
|||
|
||||
// In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard
|
||||
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true");
|
||||
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true");
|
||||
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
|
||||
|
||||
// We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur
|
||||
simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");
|
||||
|
|
Loading…
Reference in New Issue