LUCENE-3099: allow subclasses to determine the group value

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130858 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-06-02 22:35:17 +00:00
parent 710e630e91
commit d1548ca30a
14 changed files with 533 additions and 359 deletions

View File

@ -75,6 +75,10 @@ API Changes
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
(Sujit Pal via Koji Sekiguchi)
* LUCENE-3099: Allow subclasses to determine the group value for
First/SecondPassGroupingCollector. (Martijn van Groningen, Mike
McCandless)
Build
* LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.

View File

@ -0,0 +1,67 @@
package org.apache.lucene.search.grouping;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collection;
/**
* A collector that collects all groups that match the
* query. Only the group value is collected, and the order
* is undefined. This collector does not determine
* the most relevant document of a group.
*
* <p/>
* This is an abstract version. Concrete implementations define
* what a group actually is and how it is internally collected.
*
* @lucene.experimental
*/
public abstract class AbstractAllGroupsCollector<GROUP_VALUE_TYPE> extends Collector {
/**
* Returns the total number of groups for the executed search.
* This is a convenience method. The following code snippet has the same effect: <pre>getGroups().size()</pre>
*
* @return The total number of groups for the executed search
*/
public int getGroupCount() {
return getGroups().size();
}
/**
* Returns the group values
* <p/>
* This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef}
* representing a group value.
*
* @return the group values
*/
public abstract Collection<GROUP_VALUE_TYPE> getGroups();
// Empty not necessary
public void setScorer(Scorer scorer) throws IOException {}
public boolean acceptsDocsOutOfOrder() {
return true;
}
}

View File

@ -17,56 +17,39 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.search.*;
import java.io.IOException;
import java.util.*;
/** FirstPassGroupingCollector is the first of two passes necessary
* to collect grouped hits. This pass gathers the top N sorted
* groups.
* groups. Concrete subclasses define what a group is and how it
* is internally collected.
*
* <p>See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
abstract public class AbstractFirstPassGroupingCollector<GROUP_VALUE_TYPE> extends Collector {
public class FirstPassGroupingCollector extends Collector {
private final String groupField;
private final Sort groupSort;
private final FieldComparator[] comparators;
private final int[] reversed;
private final int topNGroups;
private final HashMap<BytesRef, CollectedSearchGroup> groupMap;
private final BytesRef scratchBytesRef = new BytesRef();
private final HashMap<GROUP_VALUE_TYPE, CollectedSearchGroup<GROUP_VALUE_TYPE>> groupMap;
private final int compIDXEnd;
// Set once we reach topNGroups unique groups:
private TreeSet<CollectedSearchGroup> orderedGroups;
private TreeSet<CollectedSearchGroup<GROUP_VALUE_TYPE>> orderedGroups;
private int docBase;
private int spareSlot;
private FieldCache.DocTermsIndex index;
/**
* Create the first pass collector.
*
* @param groupField The field used to group
* documents. This field must be single-valued and
* indexed (FieldCache is used to access its value
* per-document).
* @param groupSort The {@link Sort} used to sort the
* groups. The top sorted document within each group
* according to groupSort, determines how that group
@ -74,13 +57,13 @@ public class FirstPassGroupingCollector extends Collector {
* ie, if you want to groupSort by relevance use
* Sort.RELEVANCE.
* @param topNGroups How many top groups to keep.
* @throws IOException If I/O related errors occur
*/
public FirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException {
public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups) throws IOException {
if (topNGroups < 1) {
throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
}
this.groupField = groupField;
// TODO: allow null groupSort to mean "by relevance",
// and specialize it?
this.groupSort = groupSort;
@ -100,13 +83,19 @@ public class FirstPassGroupingCollector extends Collector {
}
spareSlot = topNGroups;
groupMap = new HashMap<BytesRef, CollectedSearchGroup>(topNGroups);
groupMap = new HashMap<GROUP_VALUE_TYPE, CollectedSearchGroup<GROUP_VALUE_TYPE>>(topNGroups);
}
/** Returns top groups, starting from offset. This may
* return null, if no groups were collected, or if the
* number of unique groups collected is <= offset. */
public Collection<SearchGroup> getTopGroups(int groupOffset, boolean fillFields) {
/**
* Returns top groups, starting from offset. This may
* return null, if no groups were collected, or if the
* number of unique groups collected is <= offset.
*
* @param groupOffset The offset in the collected groups
* @param fillFields Whether to fill to {@link SearchGroup#sortValues}
* @return top groups, starting from offset
*/
public Collection<SearchGroup<GROUP_VALUE_TYPE>> getTopGroups(int groupOffset, boolean fillFields) {
//System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
@ -122,15 +111,15 @@ public class FirstPassGroupingCollector extends Collector {
buildSortedSet();
}
final Collection<SearchGroup> result = new ArrayList<SearchGroup>();
final Collection<SearchGroup<GROUP_VALUE_TYPE>> result = new ArrayList<SearchGroup<GROUP_VALUE_TYPE>>();
int upto = 0;
final int sortFieldCount = groupSort.getSort().length;
for(CollectedSearchGroup group : orderedGroups) {
for(CollectedSearchGroup<GROUP_VALUE_TYPE> group : orderedGroups) {
if (upto++ < groupOffset) {
continue;
}
//System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
SearchGroup searchGroup = new SearchGroup();
SearchGroup<GROUP_VALUE_TYPE> searchGroup = new SearchGroup<GROUP_VALUE_TYPE>();
searchGroup.groupValue = group.groupValue;
if (fillFields) {
searchGroup.sortValues = new Comparable[sortFieldCount];
@ -144,10 +133,6 @@ public class FirstPassGroupingCollector extends Collector {
return result;
}
public String getGroupField() {
return groupField;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
for (FieldComparator comparator : comparators) {
@ -189,13 +174,9 @@ public class FirstPassGroupingCollector extends Collector {
// TODO: should we add option to mean "ignore docs that
// don't have the group field" (instead of stuffing them
// under null group)?
final int ord = index.getOrd(doc);
//System.out.println(" ord=" + ord);
final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc);
final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef);
//System.out.println(" group=" + (br == null ? "null" : br.utf8ToString()));
final CollectedSearchGroup group = groupMap.get(br);
final CollectedSearchGroup<GROUP_VALUE_TYPE> group = groupMap.get(groupValue);
if (group == null) {
@ -210,8 +191,8 @@ public class FirstPassGroupingCollector extends Collector {
// just keep collecting them
// Add a new CollectedSearchGroup:
CollectedSearchGroup sg = new CollectedSearchGroup();
sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef);
CollectedSearchGroup<GROUP_VALUE_TYPE> sg = new CollectedSearchGroup<GROUP_VALUE_TYPE>();
sg.groupValue = copyDocGroupValue(groupValue, null);
sg.comparatorSlot = groupMap.size();
sg.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@ -233,20 +214,14 @@ public class FirstPassGroupingCollector extends Collector {
// the bottom group with this new group.
// java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
final CollectedSearchGroup bottomGroup = orderedGroups.last();
final CollectedSearchGroup<GROUP_VALUE_TYPE> bottomGroup = orderedGroups.last();
orderedGroups.remove(bottomGroup);
assert orderedGroups.size() == topNGroups -1;
groupMap.remove(bottomGroup.groupValue);
// reuse the removed CollectedSearchGroup
if (br == null) {
bottomGroup.groupValue = null;
} else if (bottomGroup.groupValue != null) {
bottomGroup.groupValue.copy(br);
} else {
bottomGroup.groupValue = new BytesRef(br);
}
bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
bottomGroup.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@ -291,7 +266,7 @@ public class FirstPassGroupingCollector extends Collector {
// Remove before updating the group since lookup is done via comparators
// TODO: optimize this
final CollectedSearchGroup prevLast;
final CollectedSearchGroup<GROUP_VALUE_TYPE> prevLast;
if (orderedGroups != null) {
prevLast = orderedGroups.last();
orderedGroups.remove(group);
@ -336,7 +311,7 @@ public class FirstPassGroupingCollector extends Collector {
}
};
orderedGroups = new TreeSet<CollectedSearchGroup>(comparator);
orderedGroups = new TreeSet<CollectedSearchGroup<GROUP_VALUE_TYPE>>(comparator);
orderedGroups.addAll(groupMap.values());
assert orderedGroups.size() > 0;
@ -353,15 +328,31 @@ public class FirstPassGroupingCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
docBase = readerContext.docBase;
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
for (int i=0; i<comparators.length; i++) {
comparators[i] = comparators[i].setNextReader(readerContext);
}
}
/**
* Returns the group value for the specified doc.
*
* @param doc The specified doc
* @return the group value for the specified doc
*/
protected abstract GROUP_VALUE_TYPE getDocGroupValue(int doc);
/**
* Returns a copy of the specified group value by creating a new instance and copying the value from the specified
* groupValue in the new instance. Or optionally the reuse argument can be used to copy the group value in.
*
* @param groupValue The group value to copy
* @param reuse Optionally a reuse instance to prevent a new instance creation
* @return a copy of the specified group value
*/
protected abstract GROUP_VALUE_TYPE copyDocGroupValue(GROUP_VALUE_TYPE groupValue, GROUP_VALUE_TYPE reuse);
}
class CollectedSearchGroup extends SearchGroup {
class CollectedSearchGroup<T> extends SearchGroup<T> {
int topDoc;
int comparatorSlot;
}

View File

@ -0,0 +1,156 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
/**
* SecondPassGroupingCollector is the second of two passes
* necessary to collect grouped docs. This pass gathers the
* top N documents per top group computed from the
* first pass. Concrete subclasses define what a group is and how it
* is internally collected.
*
* <p>See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
public abstract class AbstractSecondPassGroupingCollector<GROUP_VALUE_TYPE> extends Collector {
protected final Map<GROUP_VALUE_TYPE, SearchGroupDocs<GROUP_VALUE_TYPE>> groupMap;
private final int maxDocsPerGroup;
protected SearchGroupDocs<GROUP_VALUE_TYPE>[] groupDocs;
private final Collection<SearchGroup<GROUP_VALUE_TYPE>> groups;
private final Sort withinGroupSort;
private final Sort groupSort;
private int totalHitCount;
private int totalGroupedHitCount;
public AbstractSecondPassGroupingCollector(Collection<SearchGroup<GROUP_VALUE_TYPE>> groups, Sort groupSort, Sort withinGroupSort,
int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
throws IOException {
//System.out.println("SP init");
if (groups.size() == 0) {
throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
}
this.groupSort = groupSort;
this.withinGroupSort = withinGroupSort;
this.groups = groups;
this.maxDocsPerGroup = maxDocsPerGroup;
groupMap = new HashMap<GROUP_VALUE_TYPE, SearchGroupDocs<GROUP_VALUE_TYPE>>(groups.size());
for (SearchGroup<GROUP_VALUE_TYPE> group : groups) {
//System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
final TopDocsCollector collector;
if (withinGroupSort == null) {
// Sort by score
collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
}
groupMap.put(group.groupValue,
new SearchGroupDocs<GROUP_VALUE_TYPE>(group.groupValue,
collector));
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
for (SearchGroupDocs<GROUP_VALUE_TYPE> group : groupMap.values()) {
group.collector.setScorer(scorer);
}
}
@Override
public void collect(int doc) throws IOException {
totalHitCount++;
SearchGroupDocs<GROUP_VALUE_TYPE> group = retrieveGroup(doc);
if (group != null) {
totalGroupedHitCount++;
group.collector.collect(doc);
}
}
/**
* Returns the group the specified doc belongs to or <code>null</code> if no group could be retrieved.
*
* @param doc The specified doc
* @return the group the specified doc belongs to or <code>null</code> if no group could be retrieved
* @throws IOException If an I/O related error occurred
*/
protected abstract SearchGroupDocs<GROUP_VALUE_TYPE> retrieveGroup(int doc) throws IOException;
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
//System.out.println("SP.setNextReader");
for (SearchGroupDocs<GROUP_VALUE_TYPE> group : groupMap.values()) {
group.collector.setNextReader(readerContext);
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
public TopGroups<GROUP_VALUE_TYPE> getTopGroups(int withinGroupOffset) {
@SuppressWarnings("unchecked")
final GroupDocs<GROUP_VALUE_TYPE>[] groupDocsResult = (GroupDocs<GROUP_VALUE_TYPE>[]) new GroupDocs[groups.size()];
int groupIDX = 0;
for(SearchGroup group : groups) {
final SearchGroupDocs<GROUP_VALUE_TYPE> groupDocs = groupMap.get(group.groupValue);
final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
groupDocsResult[groupIDX++] = new GroupDocs<GROUP_VALUE_TYPE>(topDocs.getMaxScore(),
topDocs.totalHits,
topDocs.scoreDocs,
groupDocs.groupValue,
group.sortValues);
}
return new TopGroups<GROUP_VALUE_TYPE>(groupSort.getSort(),
withinGroupSort == null ? null : withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groupDocsResult);
}
// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
public class SearchGroupDocs<GROUP_VALUE_TYPE> {
public final GROUP_VALUE_TYPE groupValue;
public final TopDocsCollector collector;
public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) {
this.groupValue = groupValue;
this.collector = collector;
}
}
}

View File

@ -49,7 +49,7 @@ import org.apache.lucene.util.PriorityQueue;
* being that the documents in each group must always be
* indexed as a block. This collector also fills in
* TopGroups.totalGroupCount without requiring the separate
* {@link AllGroupsCollector}. However, this collector does
* {@link TermAllGroupsCollector}. However, this collector does
* not fill in the groupValue of each group; this field
* will always be null.
*
@ -317,7 +317,8 @@ public class BlockGroupingCollector extends Collector {
final FakeScorer fakeScorer = new FakeScorer();
final GroupDocs[] groups = new GroupDocs[groupQueue.size() - groupOffset];
@SuppressWarnings("unchecked")
final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
final OneGroup og = groupQueue.pop();
@ -360,7 +361,7 @@ public class BlockGroupingCollector extends Collector {
final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);
groups[downTo] = new GroupDocs(topDocs.getMaxScore(),
groups[downTo] = new GroupDocs<Object>(topDocs.getMaxScore(),
og.count,
topDocs.scoreDocs,
null,
@ -375,7 +376,7 @@ public class BlockGroupingCollector extends Collector {
}
*/
return new TopGroups(new TopGroups(groupSort.getSort(),
return new TopGroups<Object>(new TopGroups<Object>(groupSort.getSort(),
withinGroupSort == null ? null : withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groups),
totalGroupCount);

View File

@ -18,15 +18,14 @@ package org.apache.lucene.search.grouping;
*/
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.util.BytesRef;
/** Represents one group in the results.
*
* @lucene.experimental */
public class GroupDocs {
public class GroupDocs<GROUP_VALUE_TYPE> {
/** The groupField value for all docs in this group; this
* may be null if hits did not have the groupField. */
public final BytesRef groupValue;
public final GROUP_VALUE_TYPE groupValue;
/** Max score in this group */
public final float maxScore;
@ -40,13 +39,13 @@ public class GroupDocs {
public final int totalHits;
/** Matches the groupSort passed to {@link
* FirstPassGroupingCollector}. */
* AbstractFirstPassGroupingCollector}. */
public final Comparable[] groupSortValues;
public GroupDocs(float maxScore,
int totalHits,
ScoreDoc[] scoreDocs,
BytesRef groupValue,
GROUP_VALUE_TYPE groupValue,
Comparable[] groupSortValues) {
this.maxScore = maxScore;
this.totalHits = totalHits;

View File

@ -17,10 +17,16 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
/**
* Represents a group that is found during the first pass search.
*
* @lucene.experimental
*/
public class SearchGroup<GROUP_VALUE_TYPE> {
/** @lucene.experimental */
public class SearchGroup {
public BytesRef groupValue;
/** The value that defines this group */
public GROUP_VALUE_TYPE groupValue;
/** The sort values used during sorting. Can be <code>null</code>. */
public Comparable[] sortValues;
}

View File

@ -1,172 +0,0 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.BytesRef;
/**
* SecondPassGroupingCollector is the second of two passes
* necessary to collect grouped docs. This pass gathers the
* top N documents per top group computed from the
* first pass.
*
* <p>See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
public class SecondPassGroupingCollector extends Collector {
private final HashMap<BytesRef, SearchGroupDocs> groupMap;
private FieldCache.DocTermsIndex index;
private final String groupField;
private final int maxDocsPerGroup;
private final SentinelIntSet ordSet;
private final SearchGroupDocs[] groupDocs;
private final BytesRef spareBytesRef = new BytesRef();
private final Collection<SearchGroup> groups;
private final Sort withinGroupSort;
private final Sort groupSort;
private int totalHitCount;
private int totalGroupedHitCount;
public SecondPassGroupingCollector(String groupField, Collection<SearchGroup> groups, Sort groupSort, Sort withinGroupSort,
int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
throws IOException {
//System.out.println("SP init");
if (groups.size() == 0) {
throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
}
this.groupSort = groupSort;
this.withinGroupSort = withinGroupSort;
this.groups = groups;
this.groupField = groupField;
this.maxDocsPerGroup = maxDocsPerGroup;
groupMap = new HashMap<BytesRef, SearchGroupDocs>(groups.size());
for (SearchGroup group : groups) {
//System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
final TopDocsCollector collector;
if (withinGroupSort == null) {
// Sort by score
collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
}
groupMap.put(group.groupValue,
new SearchGroupDocs(group.groupValue,
collector));
}
ordSet = new SentinelIntSet(groupMap.size(), -1);
groupDocs = new SearchGroupDocs[ordSet.keys.length];
}
@Override
public void setScorer(Scorer scorer) throws IOException {
for (SearchGroupDocs group : groupMap.values()) {
group.collector.setScorer(scorer);
}
}
@Override
public void collect(int doc) throws IOException {
final int slot = ordSet.find(index.getOrd(doc));
//System.out.println("SP.collect doc=" + doc + " slot=" + slot);
totalHitCount++;
if (slot >= 0) {
totalGroupedHitCount++;
groupDocs[slot].collector.collect(doc);
}
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
//System.out.println("SP.setNextReader");
for (SearchGroupDocs group : groupMap.values()) {
group.collector.setNextReader(readerContext);
}
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
// Rebuild ordSet
ordSet.clear();
for (SearchGroupDocs group : groupMap.values()) {
//System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
if (ord >= 0) {
groupDocs[ordSet.put(ord)] = group;
}
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
public TopGroups getTopGroups(int withinGroupOffset) {
final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()];
int groupIDX = 0;
for(SearchGroup group : groups) {
final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
topDocs.totalHits,
topDocs.scoreDocs,
groupDocs.groupValue,
group.sortValues);
}
return new TopGroups(groupSort.getSort(),
withinGroupSort == null ? null : withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groupDocsResult);
}
}
// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
class SearchGroupDocs {
public final BytesRef groupValue;
public final TopDocsCollector collector;
public SearchGroupDocs(BytesRef groupValue, TopDocsCollector collector) {
this.groupValue = groupValue;
this.collector = collector;
}
}

View File

@ -18,9 +18,7 @@ package org.apache.lucene.search.grouping;
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@ -43,47 +41,44 @@ import java.util.List;
*
* @lucene.experimental
*/
public class AllGroupsCollector extends Collector {
public class TermAllGroupsCollector extends AbstractAllGroupsCollector<BytesRef> {
private static final int DEFAULT_INITIAL_SIZE = 128;
private final String groupField;
private final SentinelIntSet ordSet;
private final List<BytesRef> groups;
private final BytesRef spareBytesRef = new BytesRef();
private FieldCache.DocTermsIndex index;
private final BytesRef spareBytesRef = new BytesRef();
/**
* Expert: Constructs a {@link AllGroupsCollector}
* Expert: Constructs a {@link AbstractAllGroupsCollector}
*
* @param groupField The field to group by
* @param initialSize The initial allocation size of the
* internal int set and group list
* which should roughly match the total
* number of expected unique groups. Be aware that the
* heap usage is 4 bytes * initialSize.
* internal int set and group list
* which should roughly match the total
* number of expected unique groups. Be aware that the
* heap usage is 4 bytes * initialSize.
*/
public AllGroupsCollector(String groupField, int initialSize) {
this.groupField = groupField;
public TermAllGroupsCollector(String groupField, int initialSize) {
ordSet = new SentinelIntSet(initialSize, -1);
groups = new ArrayList<BytesRef>(initialSize);
this.groupField = groupField;
}
/**
* Constructs a {@link AllGroupsCollector}. This sets the
* Constructs a {@link AbstractAllGroupsCollector}. This sets the
* initial allocation size for the internal int set and group
* list to 128.
*
* @param groupField The field to group by
*/
public AllGroupsCollector(String groupField) {
public TermAllGroupsCollector(String groupField) {
this(groupField, DEFAULT_INITIAL_SIZE);
}
public void setScorer(Scorer scorer) throws IOException {
}
public void collect(int doc) throws IOException {
int key = index.getOrd(doc);
if (!ordSet.exists(key)) {
@ -94,22 +89,7 @@ public class AllGroupsCollector extends Collector {
}
/**
* Returns the total number of groups for the executed search.
* This is a convenience method. The following code snippet has the same effect: <pre>getGroups().size()</pre>
*
* @return The total number of groups for the executed search
*/
public int getGroupCount() {
return groups.size();
}
/**
* Returns the group values
* <p/>
* This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef}
* representing a group value.
*
* @return the group values
* {@inheritDoc}
*/
public Collection<BytesRef> getGroups() {
return groups;
@ -128,7 +108,4 @@ public class AllGroupsCollector extends Collector {
}
}
public boolean acceptsDocsOutOfOrder() {
return true;
}
}
}

View File

@ -0,0 +1,85 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
/**
* Concrete implementation of {@link AbstractFirstPassGroupingCollector} that groups based on
* field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms}
* to collect groups.
*
* @lucene.experimental
*/
public class TermFirstPassGroupingCollector extends AbstractFirstPassGroupingCollector<BytesRef> {
private final BytesRef scratchBytesRef = new BytesRef();
private FieldCache.DocTermsIndex index;
private String groupField;
/**
* Create the first pass collector.
*
* @param groupField The field used to group
* documents. This field must be single-valued and
* indexed (FieldCache is used to access its value
* per-document).
* @param groupSort The {@link Sort} used to sort the
* groups. The top sorted document within each group
* according to groupSort, determines how that group
* sorts against other groups. This must be non-null,
* ie, if you want to groupSort by relevance use
* Sort.RELEVANCE.
* @param topNGroups How many top groups to keep.
* @throws IOException When I/O related errors occur
*/
public TermFirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException {
super(groupSort, topNGroups);
this.groupField = groupField;
}
@Override
protected BytesRef getDocGroupValue(int doc) {
final int ord = index.getOrd(doc);
return ord == 0 ? null : index.lookup(ord, scratchBytesRef);
}
@Override
protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) {
if (groupValue == null) {
return null;
} else if (reuse != null) {
reuse.copy(groupValue);
return reuse;
} else {
return new BytesRef(groupValue);
}
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
}
}

View File

@ -0,0 +1,76 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collection;
/**
* Concrete implementation of {@link AbstractSecondPassGroupingCollector} that groups based on
* field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms}
* to collect grouped docs.
*
* @lucene.experimental
*/
public class TermSecondPassGroupingCollector extends AbstractSecondPassGroupingCollector<BytesRef> {
private final SentinelIntSet ordSet;
private FieldCache.DocTermsIndex index;
private final BytesRef spareBytesRef = new BytesRef();
private final String groupField;
@SuppressWarnings("unchecked")
public TermSecondPassGroupingCollector(String groupField, Collection<SearchGroup<BytesRef>> groups, Sort groupSort, Sort withinGroupSort,
int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
throws IOException {
super(groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
ordSet = new SentinelIntSet(groupMap.size(), -1);
this.groupField = groupField;
groupDocs = (SearchGroupDocs<BytesRef>[]) new SearchGroupDocs[ordSet.keys.length];
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
// Rebuild ordSet
ordSet.clear();
for (SearchGroupDocs<BytesRef> group : groupMap.values()) {
// System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
if (ord >= 0) {
groupDocs[ordSet.put(ord)] = group;
}
}
}
@Override
protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
int slot = ordSet.find(index.getOrd(doc));
if (slot >= 0) {
return groupDocs[slot];
}
return null;
}
}

View File

@ -22,7 +22,7 @@ import org.apache.lucene.search.SortField;
/** Represents result returned by a grouping search.
*
* @lucene.experimental */
public class TopGroups {
public class TopGroups<GROUP_VALUE_TYPE> {
/** Number of documents matching the search */
public final int totalHitCount;
@ -33,7 +33,7 @@ public class TopGroups {
public final Integer totalGroupCount;
/** Group results in groupSort order */
public final GroupDocs[] groups;
public final GroupDocs<GROUP_VALUE_TYPE>[] groups;
/** How groups are sorted against each other */
public final SortField[] groupSort;
@ -41,7 +41,7 @@ public class TopGroups {
/** How docs are sorted within each group */
public final SortField[] withinGroupSort;
public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) {
public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<GROUP_VALUE_TYPE>[] groups) {
this.groupSort = groupSort;
this.withinGroupSort = withinGroupSort;
this.totalHitCount = totalHitCount;
@ -50,7 +50,7 @@ public class TopGroups {
this.totalGroupCount = null;
}
public TopGroups(TopGroups oldTopGroups, Integer totalGroupCount) {
public TopGroups(TopGroups<GROUP_VALUE_TYPE> oldTopGroups, Integer totalGroupCount) {
this.groupSort = oldTopGroups.groupSort;
this.withinGroupSort = oldTopGroups.withinGroupSort;
this.totalHitCount = oldTopGroups.totalHitCount;

View File

@ -43,55 +43,37 @@ field fall into a single group.</p>
</ul>
<p>The implementation is two-pass: the first pass ({@link
org.apache.lucene.search.grouping.TermFirstPassGroupingCollector})
gathers the top groups, and the second pass ({@link
org.apache.lucene.search.grouping.TermSecondPassGroupingCollector})
gathers documents within those groups. If the search is costly to
run you may want to use the {@link
org.apache.lucene.search.CachingCollector} class, which
caches hits and can (quickly) replay them for the second pass. This
way you only run the query once, but you pay a RAM cost to (briefly)
hold all hits. Results are returned as a {@link
org.apache.lucene.search.grouping.TopGroups} instance.</p>
<p>
There are two grouping implementations here:
<ul>
<li>
Arbitrary grouping that can group by any single-valued indexed
field, implemented as a two-pass collector: the first pass ({@link
org.apache.lucene.search.grouping.FirstPassGroupingCollector})
gathers the top groups, and the second pass ({@link
org.apache.lucene.search.grouping.SecondPassGroupingCollector})
gathers documents within those groups. If the search is costly to
run you may want to use the {@link
org.apache.lucene.search.CachingCollector} class, which caches
hits and can (quickly) replay them for the second pass. This way
you only run the query once, but you pay a RAM cost to (briefly)
hold all hits. Results are returned as a {@link
org.apache.lucene.search.grouping.TopGroups} instance.</p>
</li>
<li>
Indexed groups, using a single pass collector (<code>BlockGroupingCollectorDoc</code>) that
is able to group according to the doc blocks created during
indexing using <code>IndexWriter</code>'s <code>add/updateDocuments</code> API.
This is faster (~25% faster QPS) than the generic two-pass
collector, but it only works for doc blocks so you must statically
commit (during indexing) to which grouping you'll need at search
time.
This module abstracts away what defines group and how it is collected. All grouping collectors
are abstract and have currently term based implementations. One can implement
collectors that for example group on multiple fields.
</p>
<p>This implementation does not rely on a single valued grouping
field; rather, the blocks in the index define the groups, so your
application is free to determine what the grouping criteria is.
At search time, you must provide a <code>Filter</code> that marks
the last document in each group. This is a substantial memory
savings because this collector does not load
a <code>DocTermsIndex</code> from the
<code>FieldCache</code>.
</li>
</ul>
<p>The benefit of the arbitrary grouping implementation is you don't have
to commit at indexing time to a static grouping of your documents.
But the downside is it's somewhat slower to run, and requires more RAM
(a <code>FieldCache.DocTermsIndex</code> entry is created).
<p>
This module abstracts away what defines group and how it is collected. All grouping collectors
are abstract and have currently term based implementations. One can implement
collectors that for example group on multiple fields.
</p>
<p>Known limitations:</p>
<ul>
<li> For the two-pass grouping collector, the group field must be a
single-valued indexed field.
{@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field.
<li> Unlike Solr's implementation, this module cannot group by
function query values nor by arbitrary queries.
<li> Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only
implementations for grouping based on terms.
<li> Sharding is not directly supported, though is not too
difficult, if you can merge the top groups and top documents per
group yourself.
@ -101,14 +83,14 @@ But the downside is it's somewhat slower to run, and requires more RAM
(using the {@link org.apache.lucene.search.CachingCollector}):</p>
<pre class="prettyprint">
FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
boolean cacheScores = true;
double maxCacheRAMMB = 4.0;
CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB);
s.search(new TermQuery(new Term("content", searchTerm)), cachedCollector);
Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
if (topGroups == null) {
// No groups matched
@ -118,12 +100,12 @@ But the downside is it's somewhat slower to run, and requires more RAM
boolean getScores = true;
boolean getMaxScores = true;
boolean fillFields = true;
SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
//Optionally compute total group count
AllGroupsCollector allGroupsCollector = null;
TermAllGroupsCollector allGroupsCollector = null;
if (requiredTotalGroupCount) {
allGroupsCollector = new AllGroupsCollector("author");
allGroupsCollector = new TermAllGroupsCollector("author");
c2 = MultiCollector.wrap(c2, allGroupsCollector);
}
@ -135,9 +117,9 @@ But the downside is it's somewhat slower to run, and requires more RAM
s.search(new TermQuery(new Term("content", searchTerm)), c2);
}
TopGroups groupsResult = c2.getTopGroups(docOffset);
TopGroups<BytesRef> groupsResult = c2.getTopGroups(docOffset);
if (requiredTotalGroupCount) {
groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
groupResult = new TopGroups<BytesRef>(groupsResult, allGroupsCollector.getGroupCount());
}
// Render groupsResult...

View File

@ -17,9 +17,6 @@
package org.apache.lucene.search.grouping;
import java.util.*;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -33,6 +30,9 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import java.io.IOException;
import java.util.*;
// TODO
// - should test relevance sort too
// - test null
@ -103,10 +103,10 @@ public class TestGrouping extends LuceneTestCase {
w.close();
final Sort groupSort = Sort.RELEVANCE;
final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10);
final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
final TopGroups groups = c2.getTopGroups(0);
@ -236,7 +236,7 @@ public class TestGrouping extends LuceneTestCase {
}
*/
private TopGroups slowGrouping(GroupDoc[] groupDocs,
private TopGroups<BytesRef> slowGrouping(GroupDoc[] groupDocs,
String searchTerm,
boolean fillFields,
boolean getScores,
@ -296,7 +296,8 @@ public class TestGrouping extends LuceneTestCase {
final int limit = Math.min(groupOffset + topNGroups, groups.size());
final Comparator<GroupDoc> docSortComp = getComparator(docSort);
final GroupDocs[] result = new GroupDocs[limit-groupOffset];
@SuppressWarnings("unchecked")
final GroupDocs<BytesRef>[] result = new GroupDocs[limit-groupOffset];
int totalGroupedHitCount = 0;
for(int idx=groupOffset;idx < limit;idx++) {
final BytesRef group = sortedGroups.get(idx);
@ -321,7 +322,7 @@ public class TestGrouping extends LuceneTestCase {
hits = new ScoreDoc[0];
}
result[idx-groupOffset] = new GroupDocs(0.0f,
result[idx-groupOffset] = new GroupDocs<BytesRef>(0.0f,
docs.size(),
hits,
group,
@ -329,12 +330,12 @@ public class TestGrouping extends LuceneTestCase {
}
if (doAllGroups) {
return new TopGroups(
new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
return new TopGroups<BytesRef>(
new TopGroups<BytesRef>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
knownGroups.size()
);
} else {
return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
return new TopGroups<BytesRef>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
}
}
@ -525,14 +526,14 @@ public class TestGrouping extends LuceneTestCase {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups);
}
final AllGroupsCollector allGroupsCollector;
final TermAllGroupsCollector allGroupsCollector;
if (doAllGroups) {
allGroupsCollector = new AllGroupsCollector("group");
allGroupsCollector = new TermAllGroupsCollector("group");
} else {
allGroupsCollector = null;
}
final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
@ -583,19 +584,19 @@ public class TestGrouping extends LuceneTestCase {
}
}
final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
final Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
for (SearchGroup searchGroup : topGroups) {
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {
@ -613,8 +614,8 @@ public class TestGrouping extends LuceneTestCase {
}
if (doAllGroups) {
TopGroups tempTopGroups = c2.getTopGroups(docOffset);
groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
TopGroups<BytesRef> tempTopGroups = c2.getTopGroups(docOffset);
groupsResult = new TopGroups<BytesRef>(tempTopGroups, allGroupsCollector.getGroupCount());
} else {
groupsResult = c2.getTopGroups(docOffset);
}
@ -625,14 +626,14 @@ public class TestGrouping extends LuceneTestCase {
}
}
final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
if (VERBOSE) {
if (expectedGroups == null) {
System.out.println("TEST: no expected groups");
} else {
System.out.println("TEST: expected groups");
for(GroupDocs gd : expectedGroups.groups) {
for(GroupDocs<BytesRef> gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()));
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc);
@ -645,21 +646,22 @@ public class TestGrouping extends LuceneTestCase {
final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
final AllGroupsCollector allGroupsCollector2;
final TermAllGroupsCollector allGroupsCollector2;
final Collector c4;
if (doAllGroups) {
allGroupsCollector2 = new AllGroupsCollector("group");
allGroupsCollector2 = new TermAllGroupsCollector("group");
c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else {
allGroupsCollector2 = null;
c4 = c3;
}
s2.search(new TermQuery(new Term("content", searchTerm)), c4);
final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
@SuppressWarnings("unchecked")
final TopGroups<BytesRef> tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups groupsResult2;
if (doAllGroups && tempTopGroups2 != null) {
assertEquals((int) tempTopGroups2.totalGroupCount, allGroupsCollector2.getGroupCount());
groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount());
groupsResult2 = new TopGroups<BytesRef>(tempTopGroups2, allGroupsCollector2.getGroupCount());
} else {
groupsResult2 = tempTopGroups2;
}