SOLR-3436: Group count incorrect when not all shards are queried in the second pass.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1338194 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Martijn van Groningen 2012-05-14 13:42:17 +00:00
parent 015d8601c8
commit 04dceabc18
11 changed files with 135 additions and 79 deletions

View File

@ -425,6 +425,9 @@ Bug Fixes
* SOLR-3370: fixed CSVResponseWriter to respect globs in the 'fl' param
(Keith Fligg via hossman)
* SOLR-3436: Group count incorrect when not all shards are queried in the second
pass. (Francois Perron, Martijn van Groningen)
Other Changes
----------------------

View File

@ -32,22 +32,13 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.CloudState;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.*;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
@ -56,8 +47,6 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.*;
import org.apache.solr.search.grouping.CommandHandler;
import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.EndResultTransformer;
import org.apache.solr.search.grouping.distributed.ShardRequestFactory;
import org.apache.solr.search.grouping.distributed.ShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.command.QueryCommand;
@ -70,6 +59,8 @@ import org.apache.solr.search.grouping.distributed.responseprocessor.SearchGroup
import org.apache.solr.search.grouping.distributed.responseprocessor.StoredFieldsShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.responseprocessor.TopGroupsShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer;
import org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.EndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer;
@ -288,6 +279,7 @@ public class QueryComponent extends SearchComponent
.setField(searcher.getSchema().getField(field))
.setGroupSort(groupingSpec.getGroupSort())
.setTopNGroups(cmd.getOffset() + cmd.getLen())
.setIncludeGroupCount(groupingSpec.isIncludeGroupCount())
.build()
);
}
@ -329,7 +321,6 @@ public class QueryComponent extends SearchComponent
.setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit())
.setNeedScores(needScores)
.setNeedMaxScore(needScores)
.setNeedGroupCount(groupingSpec.isIncludeGroupCount())
.build()
);
}

View File

@ -168,6 +168,7 @@ public class ResponseBuilder
// Context fields for grouping
public final Map<String, Collection<SearchGroup<BytesRef>>> mergedSearchGroups = new HashMap<String, Collection<SearchGroup<BytesRef>>>();
public final Map<String, Integer> mergedGroupCounts = new HashMap<String, Integer>();
public final Map<String, Map<SearchGroup<BytesRef>, Set<String>>> searchGroupToShards = new HashMap<String, Map<SearchGroup<BytesRef>, Set<String>>>();
public final Map<String, TopGroups<BytesRef>> mergedTopGroups = new HashMap<String, TopGroups<BytesRef>>();
public final Map<String, QueryCommandResult> mergedQueryCommandResults = new HashMap<String, QueryCommandResult>();

View File

@ -0,0 +1,42 @@
package org.apache.solr.search.grouping.distributed.command;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A simple data structure to hold a pair of typed objects.
*
* @lucene.experimental
*/
public class Pair<A, B> {
private final A a;
private final B b;
public Pair(A a, B b) {
this.a = a;
this.b = b;
}
public A getA() {
return a;
}
public B getB() {
return b;
}
}

View File

@ -20,27 +20,26 @@ package org.apache.solr.search.grouping.distributed.command;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.term.TermAllGroupsCollector;
import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.grouping.Command;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.*;
/**
*
* Creates all the collectors needed for the first phase and how to handle the results.
*/
public class SearchGroupsFieldCommand implements Command<Collection<SearchGroup<BytesRef>>> {
public class SearchGroupsFieldCommand implements Command<Pair<Integer, Collection<SearchGroup<BytesRef>>>> {
public static class Builder {
private SchemaField field;
private Sort groupSort;
private Integer topNGroups;
private boolean includeGroupCount = false;
public Builder setField(SchemaField field) {
this.field = field;
@ -57,12 +56,17 @@ public class SearchGroupsFieldCommand implements Command<Collection<SearchGroup<
return this;
}
public Builder setIncludeGroupCount(boolean includeGroupCount) {
this.includeGroupCount = includeGroupCount;
return this;
}
public SearchGroupsFieldCommand build() {
if (field == null || groupSort == null || topNGroups == null) {
throw new IllegalStateException("All fields must be set");
}
return new SearchGroupsFieldCommand(field, groupSort, topNGroups);
return new SearchGroupsFieldCommand(field, groupSort, topNGroups, includeGroupCount);
}
}
@ -70,30 +74,45 @@ public class SearchGroupsFieldCommand implements Command<Collection<SearchGroup<
private final SchemaField field;
private final Sort groupSort;
private final int topNGroups;
private final boolean includeGroupCount;
private TermFirstPassGroupingCollector firstPassGroupingCollector;
private TermAllGroupsCollector allGroupsCollector;
private SearchGroupsFieldCommand(SchemaField field, Sort groupSort, int topNGroups) {
private SearchGroupsFieldCommand(SchemaField field, Sort groupSort, int topNGroups, boolean includeGroupCount) {
this.field = field;
this.groupSort = groupSort;
this.topNGroups = topNGroups;
this.includeGroupCount = includeGroupCount;
}
public List<Collector> create() throws IOException {
List<Collector> collectors = new ArrayList<Collector>();
if (topNGroups > 0) {
firstPassGroupingCollector = new TermFirstPassGroupingCollector(field.getName(), groupSort, topNGroups);
return Arrays.asList((Collector) firstPassGroupingCollector);
} else {
return Collections.emptyList();
collectors.add(firstPassGroupingCollector);
}
if (includeGroupCount) {
allGroupsCollector = new TermAllGroupsCollector(field.getName());
collectors.add(allGroupsCollector);
}
return collectors;
}
public Collection<SearchGroup<BytesRef>> result() {
public Pair<Integer, Collection<SearchGroup<BytesRef>>> result() {
final Collection<SearchGroup<BytesRef>> topGroups;
if (topNGroups > 0) {
return firstPassGroupingCollector.getTopGroups(0, true);
topGroups = firstPassGroupingCollector.getTopGroups(0, true);
} else {
return Collections.emptyList();
topGroups = Collections.emptyList();
}
final Integer groupCount;
if (includeGroupCount) {
groupCount = allGroupsCollector.getGroupCount();
} else {
groupCount = null;
}
return new Pair<Integer, Collection<SearchGroup<BytesRef>>>(groupCount, topGroups);
}
public Sort getSortWithinGroup() {

View File

@ -21,9 +21,8 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.term.TermAllGroupsCollector;
import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.grouping.Command;
@ -35,7 +34,7 @@ import java.util.Collections;
import java.util.List;
/**
*
* Defines all collectors for retrieving the second phase and how to handle the collector result.
*/
public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
@ -48,7 +47,6 @@ public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
private Integer maxDocPerGroup;
private boolean needScores = false;
private boolean needMaxScore = false;
private boolean needGroupCount = false;
public Builder setField(SchemaField field) {
this.field = field;
@ -85,18 +83,13 @@ public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
return this;
}
public Builder setNeedGroupCount(Boolean needGroupCount) {
this.needGroupCount = needGroupCount;
return this;
}
public TopGroupsFieldCommand build() {
if (field == null || groupSort == null || sortWithinGroup == null || firstPhaseGroups == null ||
maxDocPerGroup == null) {
throw new IllegalStateException("All required fields must be set");
}
return new TopGroupsFieldCommand(field, groupSort, sortWithinGroup, firstPhaseGroups, maxDocPerGroup, needScores, needMaxScore, needGroupCount);
return new TopGroupsFieldCommand(field, groupSort, sortWithinGroup, firstPhaseGroups, maxDocPerGroup, needScores, needMaxScore);
}
}
@ -108,10 +101,7 @@ public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
private final int maxDocPerGroup;
private final boolean needScores;
private final boolean needMaxScore;
private final boolean needGroupCount;
private TermSecondPassGroupingCollector secondPassCollector;
private TermAllGroupsCollector allGroupsCollector;
private TopGroupsFieldCommand(SchemaField field,
Sort groupSort,
@ -119,8 +109,7 @@ public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
Collection<SearchGroup<BytesRef>> firstPhaseGroups,
int maxDocPerGroup,
boolean needScores,
boolean needMaxScore,
boolean needGroupCount) {
boolean needMaxScore) {
this.field = field;
this.groupSort = groupSort;
this.sortWithinGroup = sortWithinGroup;
@ -128,7 +117,6 @@ public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
this.maxDocPerGroup = maxDocPerGroup;
this.needScores = needScores;
this.needMaxScore = needMaxScore;
this.needGroupCount = needGroupCount;
}
public List<Collector> create() throws IOException {
@ -141,11 +129,6 @@ public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
field.getName(), firstPhaseGroups, groupSort, sortWithinGroup, maxDocPerGroup, needScores, needMaxScore, true
);
collectors.add(secondPassCollector);
if (!needGroupCount) {
return collectors;
}
allGroupsCollector = new TermAllGroupsCollector(field.getName());
collectors.add(allGroupsCollector);
return collectors;
}
@ -155,11 +138,7 @@ public class TopGroupsFieldCommand implements Command<TopGroups<BytesRef>> {
return new TopGroups<BytesRef>(groupSort.getSort(), sortWithinGroup.getSort(), 0, 0, new GroupDocs[0]);
}
TopGroups<BytesRef> result = secondPassCollector.getTopGroups(0);
if (allGroupsCollector != null) {
result = new TopGroups<BytesRef>(result, allGroupsCollector.getGroupCount());
}
return result;
return secondPassCollector.getTopGroups(0);
}
public String getKey() {

View File

@ -27,6 +27,7 @@ import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.grouping.distributed.ShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.command.Pair;
import org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer;
import java.io.IOException;
@ -63,9 +64,17 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor
maxElapsedTime = (int) Math.max(maxElapsedTime, srsp.getSolrResponse().getElapsedTime());
@SuppressWarnings("unchecked")
NamedList<NamedList> firstPhaseResult = (NamedList<NamedList>) srsp.getSolrResponse().getResponse().get("firstPhase");
Map<String, Collection<SearchGroup<BytesRef>>> result = serializer.transformToNative(firstPhaseResult, groupSort, null, srsp.getShard());
Map<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>> result = serializer.transformToNative(firstPhaseResult, groupSort, null, srsp.getShard());
for (String field : commandSearchGroups.keySet()) {
Collection<SearchGroup<BytesRef>> searchGroups = result.get(field);
Pair<Integer, Collection<SearchGroup<BytesRef>>> firstPhaseCommandResult = result.get(field);
Integer groupCount = firstPhaseCommandResult.getA();
if (groupCount != null) {
Integer existingGroupCount = rb.mergedGroupCounts.get(field);
// Assuming groups don't cross shard boundary...
rb.mergedGroupCounts.put(field, existingGroupCount != null ? existingGroupCount + groupCount : groupCount);
}
Collection<SearchGroup<BytesRef>> searchGroups = firstPhaseCommandResult.getB();
if (searchGroups == null) {
continue;
}

View File

@ -27,6 +27,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.grouping.Command;
import org.apache.solr.search.grouping.distributed.command.Pair;
import org.apache.solr.search.grouping.distributed.command.SearchGroupsFieldCommand;
import java.io.IOException;
@ -35,7 +36,7 @@ import java.util.*;
/**
* Implementation for transforming {@link SearchGroup} into a {@link NamedList} structure and visa versa.
*/
public class SearchGroupsResultTransformer implements ShardResultTransformer<List<Command>, Map<String, Collection<SearchGroup<BytesRef>>>> {
public class SearchGroupsResultTransformer implements ShardResultTransformer<List<Command>, Map<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>>> {
private final SolrIndexSearcher searcher;
@ -49,17 +50,20 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer<Lis
public NamedList transform(List<Command> data) throws IOException {
NamedList<NamedList> result = new NamedList<NamedList>();
for (Command command : data) {
NamedList commandResult;
final NamedList<Object> commandResult = new NamedList<Object>();
if (SearchGroupsFieldCommand.class.isInstance(command)) {
SearchGroupsFieldCommand fieldCommand = (SearchGroupsFieldCommand) command;
Collection<SearchGroup<BytesRef>> searchGroups = fieldCommand.result();
if (searchGroups == null) {
continue;
Pair<Integer, Collection<SearchGroup<BytesRef>>> pair = fieldCommand.result();
Integer groupedCount = pair.getA();
Collection<SearchGroup<BytesRef>> searchGroups = pair.getB();
if (searchGroups != null) {
commandResult.add("topGroups", serializeSearchGroup(searchGroups, fieldCommand.getGroupSort()));
}
if (groupedCount != null) {
commandResult.add("groupCount", groupedCount);
}
commandResult = serializeSearchGroup(searchGroups, fieldCommand.getGroupSort());
} else {
commandResult = null;
continue;
}
result.add(command.getKey(), commandResult);
@ -70,20 +74,24 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer<Lis
/**
* {@inheritDoc}
*/
public Map<String, Collection<SearchGroup<BytesRef>>> transformToNative(NamedList<NamedList> shardResponse, Sort groupSort, Sort sortWithinGroup, String shard) throws IOException {
Map<String, Collection<SearchGroup<BytesRef>>> result = new HashMap<String, Collection<SearchGroup<BytesRef>>>();
public Map<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>> transformToNative(NamedList<NamedList> shardResponse, Sort groupSort, Sort sortWithinGroup, String shard) throws IOException {
Map<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>> result = new HashMap<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>>();
for (Map.Entry<String, NamedList> command : shardResponse) {
List<SearchGroup<BytesRef>> searchGroups = new ArrayList<SearchGroup<BytesRef>>();
NamedList topGroupsAndGroupCount = command.getValue();
@SuppressWarnings("unchecked")
NamedList<List<Comparable>> rawSearchGroups = command.getValue();
NamedList<List<Comparable>> rawSearchGroups = (NamedList<List<Comparable>>) topGroupsAndGroupCount.get("topGroups");
if (rawSearchGroups != null) {
for (Map.Entry<String, List<Comparable>> rawSearchGroup : rawSearchGroups){
SearchGroup<BytesRef> searchGroup = new SearchGroup<BytesRef>();
searchGroup.groupValue = rawSearchGroup.getKey() != null ? new BytesRef(rawSearchGroup.getKey()) : null;
searchGroup.sortValues = rawSearchGroup.getValue().toArray(new Comparable[rawSearchGroup.getValue().size()]);
searchGroups.add(searchGroup);
}
}
result.put(command.getKey(), searchGroups);
Integer groupCount = (Integer) topGroupsAndGroupCount.get("groupCount");
result.put(command.getKey(), new Pair<Integer, Collection<SearchGroup<BytesRef>>>(groupCount, searchGroups));
}
return result;
}

View File

@ -115,10 +115,9 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
}
Integer totalHitCount = (Integer) commandResult.get("totalHitCount");
Integer totalGroupCount = (Integer) commandResult.get("totalGroupCount");
List<GroupDocs<BytesRef>> groupDocs = new ArrayList<GroupDocs<BytesRef>>();
for (int i = totalGroupCount == null ? 2 : 3; i < commandResult.size(); i++) {
for (int i = 2; i < commandResult.size(); i++) {
String groupValue = commandResult.getName(i);
@SuppressWarnings("unchecked")
NamedList<Object> groupResult = (NamedList<Object>) commandResult.getVal(i);
@ -151,9 +150,6 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
TopGroups<BytesRef> topGroups = new TopGroups<BytesRef>(
groupSort.getSort(), sortWithinGroup.getSort(), totalHitCount, totalGroupedHitCount, groupDocsArr
);
if (totalGroupCount != null) {
topGroups = new TopGroups<BytesRef>(topGroups, totalGroupCount);
}
result.put(key, topGroups);
}

View File

@ -57,8 +57,9 @@ public class GroupedEndResultTransformer implements EndResultTransformer {
TopGroups<BytesRef> topGroups = (TopGroups<BytesRef>) value;
NamedList<Object> command = new SimpleOrderedMap<Object>();
command.add("matches", rb.totalHitCount);
if (topGroups.totalGroupCount != null) {
command.add("ngroups", topGroups.totalGroupCount);
Integer totalGroupCount = rb.mergedGroupCounts.get(entry.getKey());
if (totalGroupCount != null) {
command.add("ngroups", totalGroupCount);
}
List<NamedList> groups = new ArrayList<NamedList>();

View File

@ -169,11 +169,16 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
// SOLR-3316
query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
// SOLR-3436
query("q", "*:*", "fq", s1 + ":a", "fl", "id," + i1, "group", "true", "group.field", i1, "sort", i1 + " asc, id asc", "group.ngroups", "true");
query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "sort", i1 + " asc, id asc", "group.ngroups", "true");
ModifiableSolrParams params = new ModifiableSolrParams();
Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10};
Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "group.ngroups", "true"};
for (int i = 0; i < q.length; i += 2) {
params.add(q[i].toString(), q[i + 1].toString());
@ -187,7 +192,9 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
NamedList nl = (NamedList<?>) rsp.getResponse().get("grouped");
nl = (NamedList<?>) nl.getVal(0);
int matches = (Integer) nl.getVal(0);
int groupCount = (Integer) nl.get("ngroups");
assertEquals(100 * shardsArr.length, matches);
assertEquals(shardsArr.length, groupCount);
// We cannot validate distributed grouping with scoring as first sort. since there is no global idf. We can check if no errors occur