From d1548ca30a740f6a586ee20aa4980fab02ffe49b Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 2 Jun 2011 22:35:17 +0000 Subject: [PATCH] LUCENE-3099: allow subclasses to determine the group value git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130858 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/CHANGES.txt | 4 + .../grouping/AbstractAllGroupsCollector.java | 67 +++++++ ...> AbstractFirstPassGroupingCollector.java} | 113 ++++++------ .../AbstractSecondPassGroupingCollector.java | 156 ++++++++++++++++ .../grouping/BlockGroupingCollector.java | 9 +- .../lucene/search/grouping/GroupDocs.java | 9 +- .../lucene/search/grouping/SearchGroup.java | 14 +- .../grouping/SecondPassGroupingCollector.java | 172 ------------------ ...ector.java => TermAllGroupsCollector.java} | 49 ++--- .../TermFirstPassGroupingCollector.java | 85 +++++++++ .../TermSecondPassGroupingCollector.java | 76 ++++++++ .../lucene/search/grouping/TopGroups.java | 8 +- .../lucene/search/grouping/package.html | 78 +++----- .../lucene/search/grouping/TestGrouping.java | 52 +++--- 14 files changed, 533 insertions(+), 359 deletions(-) create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java rename modules/grouping/src/java/org/apache/lucene/search/grouping/{FirstPassGroupingCollector.java => AbstractFirstPassGroupingCollector.java} (78%) create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java delete mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java rename modules/grouping/src/java/org/apache/lucene/search/grouping/{AllGroupsCollector.java => TermAllGroupsCollector.java} (71%) create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 1aeba0c32cb..e45af1f3640 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -75,6 +75,10 @@ API Changes * LUCENE-3141: add getter method to access fragInfos in FieldFragList. (Sujit Pal via Koji Sekiguchi) + * LUCENE-3099: Allow subclasses to determine the group value for + First/SecondPassGroupingCollector. (Martijn van Groningen, Mike + McCandless) + Build * LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8. diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java new file mode 100644 index 00000000000..b8ac5f84411 --- /dev/null +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java @@ -0,0 +1,67 @@ +package org.apache.lucene.search.grouping; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.Collection; + +/** + * A collector that collects all groups that match the + * query. Only the group value is collected, and the order + * is undefined. This collector does not determine + * the most relevant document of a group. + * + *

+ * This is an abstract version. Concrete implementations define + * what a group actually is and how it is internally collected. + * + * @lucene.experimental + */ +public abstract class AbstractAllGroupsCollector extends Collector { + + /** + * Returns the total number of groups for the executed search. + * This is a convenience method. The following code snippet has the same effect:

getGroups().size()
+ * + * @return The total number of groups for the executed search + */ + public int getGroupCount() { + return getGroups().size(); + } + + /** + * Returns the group values + *

+ * This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef} + * representing a group value. + * + * @return the group values + */ + public abstract Collection getGroups(); + + // Empty not necessary + public void setScorer(Scorer scorer) throws IOException {} + + public boolean acceptsDocsOutOfOrder() { + return true; + } +} \ No newline at end of file diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java similarity index 78% rename from modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java rename to modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java index 64dd0429f41..95f56911e7c 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java @@ -17,56 +17,39 @@ package org.apache.lucene.search.grouping; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.TreeSet; - import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.util.BytesRef; +import org.apache.lucene.search.*; + +import java.io.IOException; +import java.util.*; /** FirstPassGroupingCollector is the first of two passes necessary * to collect grouped hits. This pass gathers the top N sorted - * groups. + * groups. Concrete subclasses define what a group is and how it + * is internally collected. * *

See {@link org.apache.lucene.search.grouping} for more * details including a full code example.

* * @lucene.experimental */ +abstract public class AbstractFirstPassGroupingCollector extends Collector { -public class FirstPassGroupingCollector extends Collector { - - private final String groupField; private final Sort groupSort; private final FieldComparator[] comparators; private final int[] reversed; private final int topNGroups; - private final HashMap groupMap; - private final BytesRef scratchBytesRef = new BytesRef(); + private final HashMap> groupMap; private final int compIDXEnd; // Set once we reach topNGroups unique groups: - private TreeSet orderedGroups; + private TreeSet> orderedGroups; private int docBase; private int spareSlot; - private FieldCache.DocTermsIndex index; /** * Create the first pass collector. * - * @param groupField The field used to group - * documents. This field must be single-valued and - * indexed (FieldCache is used to access its value - * per-document). * @param groupSort The {@link Sort} used to sort the * groups. The top sorted document within each group * according to groupSort, determines how that group @@ -74,13 +57,13 @@ public class FirstPassGroupingCollector extends Collector { * ie, if you want to groupSort by relevance use * Sort.RELEVANCE. * @param topNGroups How many top groups to keep. + * @throws IOException If I/O related errors occur */ - public FirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException { + public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups) throws IOException { if (topNGroups < 1) { throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")"); } - this.groupField = groupField; // TODO: allow null groupSort to mean "by relevance", // and specialize it? this.groupSort = groupSort; @@ -100,13 +83,19 @@ public class FirstPassGroupingCollector extends Collector { } spareSlot = topNGroups; - groupMap = new HashMap(topNGroups); + groupMap = new HashMap>(topNGroups); } - /** Returns top groups, starting from offset. This may - * return null, if no groups were collected, or if the - * number of unique groups collected is <= offset. */ - public Collection getTopGroups(int groupOffset, boolean fillFields) { + /** + * Returns top groups, starting from offset. This may + * return null, if no groups were collected, or if the + * number of unique groups collected is <= offset. + * + * @param groupOffset The offset in the collected groups + * @param fillFields Whether to fill to {@link SearchGroup#sortValues} + * @return top groups, starting from offset + */ + public Collection> getTopGroups(int groupOffset, boolean fillFields) { //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size()); @@ -122,15 +111,15 @@ public class FirstPassGroupingCollector extends Collector { buildSortedSet(); } - final Collection result = new ArrayList(); + final Collection> result = new ArrayList>(); int upto = 0; final int sortFieldCount = groupSort.getSort().length; - for(CollectedSearchGroup group : orderedGroups) { + for(CollectedSearchGroup group : orderedGroups) { if (upto++ < groupOffset) { continue; } //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - SearchGroup searchGroup = new SearchGroup(); + SearchGroup searchGroup = new SearchGroup(); searchGroup.groupValue = group.groupValue; if (fillFields) { searchGroup.sortValues = new Comparable[sortFieldCount]; @@ -144,10 +133,6 @@ public class FirstPassGroupingCollector extends Collector { return result; } - public String getGroupField() { - return groupField; - } - @Override public void setScorer(Scorer scorer) throws IOException { for (FieldComparator comparator : comparators) { @@ -189,13 +174,9 @@ public class FirstPassGroupingCollector extends Collector { // TODO: should we add option to mean "ignore docs that // don't have the group field" (instead of stuffing them // under null group)? - final int ord = index.getOrd(doc); - //System.out.println(" ord=" + ord); + final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc); - final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef); - //System.out.println(" group=" + (br == null ? "null" : br.utf8ToString())); - - final CollectedSearchGroup group = groupMap.get(br); + final CollectedSearchGroup group = groupMap.get(groupValue); if (group == null) { @@ -210,8 +191,8 @@ public class FirstPassGroupingCollector extends Collector { // just keep collecting them // Add a new CollectedSearchGroup: - CollectedSearchGroup sg = new CollectedSearchGroup(); - sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef); + CollectedSearchGroup sg = new CollectedSearchGroup(); + sg.groupValue = copyDocGroupValue(groupValue, null); sg.comparatorSlot = groupMap.size(); sg.topDoc = docBase + doc; for (FieldComparator fc : comparators) { @@ -233,20 +214,14 @@ public class FirstPassGroupingCollector extends Collector { // the bottom group with this new group. // java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast(); - final CollectedSearchGroup bottomGroup = orderedGroups.last(); + final CollectedSearchGroup bottomGroup = orderedGroups.last(); orderedGroups.remove(bottomGroup); assert orderedGroups.size() == topNGroups -1; groupMap.remove(bottomGroup.groupValue); // reuse the removed CollectedSearchGroup - if (br == null) { - bottomGroup.groupValue = null; - } else if (bottomGroup.groupValue != null) { - bottomGroup.groupValue.copy(br); - } else { - bottomGroup.groupValue = new BytesRef(br); - } + bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue); bottomGroup.topDoc = docBase + doc; for (FieldComparator fc : comparators) { @@ -291,7 +266,7 @@ public class FirstPassGroupingCollector extends Collector { // Remove before updating the group since lookup is done via comparators // TODO: optimize this - final CollectedSearchGroup prevLast; + final CollectedSearchGroup prevLast; if (orderedGroups != null) { prevLast = orderedGroups.last(); orderedGroups.remove(group); @@ -336,7 +311,7 @@ public class FirstPassGroupingCollector extends Collector { } }; - orderedGroups = new TreeSet(comparator); + orderedGroups = new TreeSet>(comparator); orderedGroups.addAll(groupMap.values()); assert orderedGroups.size() > 0; @@ -353,15 +328,31 @@ public class FirstPassGroupingCollector extends Collector { @Override public void setNextReader(AtomicReaderContext readerContext) throws IOException { docBase = readerContext.docBase; - index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); - for (int i=0; i extends SearchGroup { int topDoc; int comparatorSlot; } diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java new file mode 100644 index 00000000000..4d91d218a7c --- /dev/null +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java @@ -0,0 +1,156 @@ +package org.apache.lucene.search.grouping; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.*; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * SecondPassGroupingCollector is the second of two passes + * necessary to collect grouped docs. This pass gathers the + * top N documents per top group computed from the + * first pass. Concrete subclasses define what a group is and how it + * is internally collected. + * + *

See {@link org.apache.lucene.search.grouping} for more + * details including a full code example.

+ * + * @lucene.experimental + */ +public abstract class AbstractSecondPassGroupingCollector extends Collector { + + protected final Map> groupMap; + private final int maxDocsPerGroup; + protected SearchGroupDocs[] groupDocs; + private final Collection> groups; + private final Sort withinGroupSort; + private final Sort groupSort; + + private int totalHitCount; + private int totalGroupedHitCount; + + public AbstractSecondPassGroupingCollector(Collection> groups, Sort groupSort, Sort withinGroupSort, + int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) + throws IOException { + + //System.out.println("SP init"); + if (groups.size() == 0) { + throw new IllegalArgumentException("no groups to collect (groups.size() is 0)"); + } + + this.groupSort = groupSort; + this.withinGroupSort = withinGroupSort; + this.groups = groups; + this.maxDocsPerGroup = maxDocsPerGroup; + groupMap = new HashMap>(groups.size()); + + for (SearchGroup group : groups) { + //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); + final TopDocsCollector collector; + if (withinGroupSort == null) { + // Sort by score + collector = TopScoreDocCollector.create(maxDocsPerGroup, true); + } else { + // Sort by fields + collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true); + } + groupMap.put(group.groupValue, + new SearchGroupDocs(group.groupValue, + collector)); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + for (SearchGroupDocs group : groupMap.values()) { + group.collector.setScorer(scorer); + } + } + + @Override + public void collect(int doc) throws IOException { + totalHitCount++; + SearchGroupDocs group = retrieveGroup(doc); + if (group != null) { + totalGroupedHitCount++; + group.collector.collect(doc); + } + } + + /** + * Returns the group the specified doc belongs to or null if no group could be retrieved. + * + * @param doc The specified doc + * @return the group the specified doc belongs to or null if no group could be retrieved + * @throws IOException If an I/O related error occurred + */ + protected abstract SearchGroupDocs retrieveGroup(int doc) throws IOException; + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + //System.out.println("SP.setNextReader"); + for (SearchGroupDocs group : groupMap.values()) { + group.collector.setNextReader(readerContext); + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + + public TopGroups getTopGroups(int withinGroupOffset) { + @SuppressWarnings("unchecked") + final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()]; + + int groupIDX = 0; + for(SearchGroup group : groups) { + final SearchGroupDocs groupDocs = groupMap.get(group.groupValue); + final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup); + groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(), + topDocs.totalHits, + topDocs.scoreDocs, + groupDocs.groupValue, + group.sortValues); + } + + return new TopGroups(groupSort.getSort(), + withinGroupSort == null ? null : withinGroupSort.getSort(), + totalHitCount, totalGroupedHitCount, groupDocsResult); + } + + + // TODO: merge with SearchGroup or not? + // ad: don't need to build a new hashmap + // disad: blows up the size of SearchGroup if we need many of them, and couples implementations + public class SearchGroupDocs { + + public final GROUP_VALUE_TYPE groupValue; + public final TopDocsCollector collector; + + public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) { + this.groupValue = groupValue; + this.collector = collector; + } + } +} diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java index d2c6eb18a13..06a7c988452 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java @@ -49,7 +49,7 @@ import org.apache.lucene.util.PriorityQueue; * being that the documents in each group must always be * indexed as a block. This collector also fills in * TopGroups.totalGroupCount without requiring the separate - * {@link AllGroupsCollector}. However, this collector does + * {@link TermAllGroupsCollector}. However, this collector does * not fill in the groupValue of each group; this field * will always be null. * @@ -317,7 +317,8 @@ public class BlockGroupingCollector extends Collector { final FakeScorer fakeScorer = new FakeScorer(); - final GroupDocs[] groups = new GroupDocs[groupQueue.size() - groupOffset]; + @SuppressWarnings("unchecked") + final GroupDocs[] groups = new GroupDocs[groupQueue.size() - groupOffset]; for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) { final OneGroup og = groupQueue.pop(); @@ -360,7 +361,7 @@ public class BlockGroupingCollector extends Collector { final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup); - groups[downTo] = new GroupDocs(topDocs.getMaxScore(), + groups[downTo] = new GroupDocs(topDocs.getMaxScore(), og.count, topDocs.scoreDocs, null, @@ -375,7 +376,7 @@ public class BlockGroupingCollector extends Collector { } */ - return new TopGroups(new TopGroups(groupSort.getSort(), + return new TopGroups(new TopGroups(groupSort.getSort(), withinGroupSort == null ? null : withinGroupSort.getSort(), totalHitCount, totalGroupedHitCount, groups), totalGroupCount); diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java index 164ba050ce9..9de84254874 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java @@ -18,15 +18,14 @@ package org.apache.lucene.search.grouping; */ import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.util.BytesRef; /** Represents one group in the results. * * @lucene.experimental */ -public class GroupDocs { +public class GroupDocs { /** The groupField value for all docs in this group; this * may be null if hits did not have the groupField. */ - public final BytesRef groupValue; + public final GROUP_VALUE_TYPE groupValue; /** Max score in this group */ public final float maxScore; @@ -40,13 +39,13 @@ public class GroupDocs { public final int totalHits; /** Matches the groupSort passed to {@link - * FirstPassGroupingCollector}. */ + * AbstractFirstPassGroupingCollector}. */ public final Comparable[] groupSortValues; public GroupDocs(float maxScore, int totalHits, ScoreDoc[] scoreDocs, - BytesRef groupValue, + GROUP_VALUE_TYPE groupValue, Comparable[] groupSortValues) { this.maxScore = maxScore; this.totalHits = totalHits; diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java index ebee113818f..11820da35f6 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java @@ -17,10 +17,16 @@ package org.apache.lucene.search.grouping; * limitations under the License. */ -import org.apache.lucene.util.BytesRef; +/** + * Represents a group that is found during the first pass search. + * + * @lucene.experimental + */ +public class SearchGroup { -/** @lucene.experimental */ -public class SearchGroup { - public BytesRef groupValue; + /** The value that defines this group */ + public GROUP_VALUE_TYPE groupValue; + + /** The sort values used during sorting. Can be null. */ public Comparable[] sortValues; } diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java deleted file mode 100644 index 1d486f7cd6e..00000000000 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java +++ /dev/null @@ -1,172 +0,0 @@ -package org.apache.lucene.search.grouping; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; - -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopDocsCollector; -import org.apache.lucene.search.TopFieldCollector; -import org.apache.lucene.search.TopScoreDocCollector; -import org.apache.lucene.util.BytesRef; - -/** - * SecondPassGroupingCollector is the second of two passes - * necessary to collect grouped docs. This pass gathers the - * top N documents per top group computed from the - * first pass. - * - *

See {@link org.apache.lucene.search.grouping} for more - * details including a full code example.

- * - * @lucene.experimental - */ -public class SecondPassGroupingCollector extends Collector { - private final HashMap groupMap; - - private FieldCache.DocTermsIndex index; - private final String groupField; - private final int maxDocsPerGroup; - private final SentinelIntSet ordSet; - private final SearchGroupDocs[] groupDocs; - private final BytesRef spareBytesRef = new BytesRef(); - private final Collection groups; - private final Sort withinGroupSort; - private final Sort groupSort; - - private int totalHitCount; - private int totalGroupedHitCount; - - public SecondPassGroupingCollector(String groupField, Collection groups, Sort groupSort, Sort withinGroupSort, - int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) - throws IOException { - - //System.out.println("SP init"); - if (groups.size() == 0) { - throw new IllegalArgumentException("no groups to collect (groups.size() is 0)"); - } - - this.groupSort = groupSort; - this.withinGroupSort = withinGroupSort; - this.groups = groups; - this.groupField = groupField; - this.maxDocsPerGroup = maxDocsPerGroup; - - groupMap = new HashMap(groups.size()); - - for (SearchGroup group : groups) { - //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - final TopDocsCollector collector; - if (withinGroupSort == null) { - // Sort by score - collector = TopScoreDocCollector.create(maxDocsPerGroup, true); - } else { - // Sort by fields - collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true); - } - groupMap.put(group.groupValue, - new SearchGroupDocs(group.groupValue, - collector)); - } - - ordSet = new SentinelIntSet(groupMap.size(), -1); - groupDocs = new SearchGroupDocs[ordSet.keys.length]; - } - - @Override - public void setScorer(Scorer scorer) throws IOException { - for (SearchGroupDocs group : groupMap.values()) { - group.collector.setScorer(scorer); - } - } - - @Override - public void collect(int doc) throws IOException { - final int slot = ordSet.find(index.getOrd(doc)); - //System.out.println("SP.collect doc=" + doc + " slot=" + slot); - totalHitCount++; - if (slot >= 0) { - totalGroupedHitCount++; - groupDocs[slot].collector.collect(doc); - } - } - - @Override - public void setNextReader(AtomicReaderContext readerContext) throws IOException { - //System.out.println("SP.setNextReader"); - for (SearchGroupDocs group : groupMap.values()) { - group.collector.setNextReader(readerContext); - } - index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); - - // Rebuild ordSet - ordSet.clear(); - for (SearchGroupDocs group : groupMap.values()) { - //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef); - if (ord >= 0) { - groupDocs[ordSet.put(ord)] = group; - } - } - } - - @Override - public boolean acceptsDocsOutOfOrder() { - return false; - } - - public TopGroups getTopGroups(int withinGroupOffset) { - final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()]; - - int groupIDX = 0; - for(SearchGroup group : groups) { - final SearchGroupDocs groupDocs = groupMap.get(group.groupValue); - final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup); - groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(), - topDocs.totalHits, - topDocs.scoreDocs, - groupDocs.groupValue, - group.sortValues); - } - - return new TopGroups(groupSort.getSort(), - withinGroupSort == null ? null : withinGroupSort.getSort(), - totalHitCount, totalGroupedHitCount, groupDocsResult); - } -} - - -// TODO: merge with SearchGroup or not? -// ad: don't need to build a new hashmap -// disad: blows up the size of SearchGroup if we need many of them, and couples implementations -class SearchGroupDocs { - public final BytesRef groupValue; - public final TopDocsCollector collector; - - public SearchGroupDocs(BytesRef groupValue, TopDocsCollector collector) { - this.groupValue = groupValue; - this.collector = collector; - } -} diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermAllGroupsCollector.java similarity index 71% rename from modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java rename to modules/grouping/src/java/org/apache/lucene/search/grouping/TermAllGroupsCollector.java index 496ced1f232..6d0ac38b305 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermAllGroupsCollector.java @@ -18,9 +18,7 @@ package org.apache.lucene.search.grouping; */ import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Collector; import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -43,47 +41,44 @@ import java.util.List; * * @lucene.experimental */ -public class AllGroupsCollector extends Collector { +public class TermAllGroupsCollector extends AbstractAllGroupsCollector { private static final int DEFAULT_INITIAL_SIZE = 128; private final String groupField; private final SentinelIntSet ordSet; private final List groups; - private final BytesRef spareBytesRef = new BytesRef(); private FieldCache.DocTermsIndex index; + private final BytesRef spareBytesRef = new BytesRef(); /** - * Expert: Constructs a {@link AllGroupsCollector} + * Expert: Constructs a {@link AbstractAllGroupsCollector} * * @param groupField The field to group by * @param initialSize The initial allocation size of the - * internal int set and group list - * which should roughly match the total - * number of expected unique groups. Be aware that the - * heap usage is 4 bytes * initialSize. + * internal int set and group list + * which should roughly match the total + * number of expected unique groups. Be aware that the + * heap usage is 4 bytes * initialSize. */ - public AllGroupsCollector(String groupField, int initialSize) { - this.groupField = groupField; + public TermAllGroupsCollector(String groupField, int initialSize) { ordSet = new SentinelIntSet(initialSize, -1); groups = new ArrayList(initialSize); + this.groupField = groupField; } /** - * Constructs a {@link AllGroupsCollector}. This sets the + * Constructs a {@link AbstractAllGroupsCollector}. This sets the * initial allocation size for the internal int set and group * list to 128. * * @param groupField The field to group by */ - public AllGroupsCollector(String groupField) { + public TermAllGroupsCollector(String groupField) { this(groupField, DEFAULT_INITIAL_SIZE); } - public void setScorer(Scorer scorer) throws IOException { - } - public void collect(int doc) throws IOException { int key = index.getOrd(doc); if (!ordSet.exists(key)) { @@ -94,22 +89,7 @@ public class AllGroupsCollector extends Collector { } /** - * Returns the total number of groups for the executed search. - * This is a convenience method. The following code snippet has the same effect:
getGroups().size()
- * - * @return The total number of groups for the executed search - */ - public int getGroupCount() { - return groups.size(); - } - - /** - * Returns the group values - *

- * This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef} - * representing a group value. - * - * @return the group values + * {@inheritDoc} */ public Collection getGroups() { return groups; @@ -128,7 +108,4 @@ public class AllGroupsCollector extends Collector { } } - public boolean acceptsDocsOutOfOrder() { - return true; - } -} \ No newline at end of file +} diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java new file mode 100644 index 00000000000..d194d0ed1d0 --- /dev/null +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java @@ -0,0 +1,85 @@ +package org.apache.lucene.search.grouping; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Sort; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * Concrete implementation of {@link AbstractFirstPassGroupingCollector} that groups based on + * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms} + * to collect groups. + * + * @lucene.experimental + */ +public class TermFirstPassGroupingCollector extends AbstractFirstPassGroupingCollector { + + private final BytesRef scratchBytesRef = new BytesRef(); + private FieldCache.DocTermsIndex index; + + private String groupField; + + /** + * Create the first pass collector. + * + * @param groupField The field used to group + * documents. This field must be single-valued and + * indexed (FieldCache is used to access its value + * per-document). + * @param groupSort The {@link Sort} used to sort the + * groups. The top sorted document within each group + * according to groupSort, determines how that group + * sorts against other groups. This must be non-null, + * ie, if you want to groupSort by relevance use + * Sort.RELEVANCE. + * @param topNGroups How many top groups to keep. + * @throws IOException When I/O related errors occur + */ + public TermFirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException { + super(groupSort, topNGroups); + this.groupField = groupField; + } + + @Override + protected BytesRef getDocGroupValue(int doc) { + final int ord = index.getOrd(doc); + return ord == 0 ? null : index.lookup(ord, scratchBytesRef); + } + + @Override + protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) { + if (groupValue == null) { + return null; + } else if (reuse != null) { + reuse.copy(groupValue); + return reuse; + } else { + return new BytesRef(groupValue); + } + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); + } +} diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java new file mode 100644 index 00000000000..40d91b811a7 --- /dev/null +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java @@ -0,0 +1,76 @@ +package org.apache.lucene.search.grouping; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Sort; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.Collection; + +/** + * Concrete implementation of {@link AbstractSecondPassGroupingCollector} that groups based on + * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms} + * to collect grouped docs. + * + * @lucene.experimental + */ +public class TermSecondPassGroupingCollector extends AbstractSecondPassGroupingCollector { + + private final SentinelIntSet ordSet; + private FieldCache.DocTermsIndex index; + private final BytesRef spareBytesRef = new BytesRef(); + private final String groupField; + + @SuppressWarnings("unchecked") + public TermSecondPassGroupingCollector(String groupField, Collection> groups, Sort groupSort, Sort withinGroupSort, + int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) + throws IOException { + super(groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + ordSet = new SentinelIntSet(groupMap.size(), -1); + this.groupField = groupField; + groupDocs = (SearchGroupDocs[]) new SearchGroupDocs[ordSet.keys.length]; + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); + + // Rebuild ordSet + ordSet.clear(); + for (SearchGroupDocs group : groupMap.values()) { +// System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); + int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef); + if (ord >= 0) { + groupDocs[ordSet.put(ord)] = group; + } + } + } + + @Override + protected SearchGroupDocs retrieveGroup(int doc) throws IOException { + int slot = ordSet.find(index.getOrd(doc)); + if (slot >= 0) { + return groupDocs[slot]; + } + return null; + } +} \ No newline at end of file diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java index 2dbb38fa186..a46aa410c20 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java @@ -22,7 +22,7 @@ import org.apache.lucene.search.SortField; /** Represents result returned by a grouping search. * * @lucene.experimental */ -public class TopGroups { +public class TopGroups { /** Number of documents matching the search */ public final int totalHitCount; @@ -33,7 +33,7 @@ public class TopGroups { public final Integer totalGroupCount; /** Group results in groupSort order */ - public final GroupDocs[] groups; + public final GroupDocs[] groups; /** How groups are sorted against each other */ public final SortField[] groupSort; @@ -41,7 +41,7 @@ public class TopGroups { /** How docs are sorted within each group */ public final SortField[] withinGroupSort; - public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) { + public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) { this.groupSort = groupSort; this.withinGroupSort = withinGroupSort; this.totalHitCount = totalHitCount; @@ -50,7 +50,7 @@ public class TopGroups { this.totalGroupCount = null; } - public TopGroups(TopGroups oldTopGroups, Integer totalGroupCount) { + public TopGroups(TopGroups oldTopGroups, Integer totalGroupCount) { this.groupSort = oldTopGroups.groupSort; this.withinGroupSort = oldTopGroups.withinGroupSort; this.totalHitCount = oldTopGroups.totalHitCount; diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html b/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html index 4b2f02a7caa..cd4717caf43 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html @@ -43,55 +43,37 @@ field fall into a single group.

+

The implementation is two-pass: the first pass ({@link + org.apache.lucene.search.grouping.TermFirstPassGroupingCollector}) + gathers the top groups, and the second pass ({@link + org.apache.lucene.search.grouping.TermSecondPassGroupingCollector}) + gathers documents within those groups. If the search is costly to + run you may want to use the {@link + org.apache.lucene.search.CachingCollector} class, which + caches hits and can (quickly) replay them for the second pass. This + way you only run the query once, but you pay a RAM cost to (briefly) + hold all hits. Results are returned as a {@link + org.apache.lucene.search.grouping.TopGroups} instance.

+

-There are two grouping implementations here: -

    -
  • - Arbitrary grouping that can group by any single-valued indexed - field, implemented as a two-pass collector: the first pass ({@link - org.apache.lucene.search.grouping.FirstPassGroupingCollector}) - gathers the top groups, and the second pass ({@link - org.apache.lucene.search.grouping.SecondPassGroupingCollector}) - gathers documents within those groups. If the search is costly to - run you may want to use the {@link - org.apache.lucene.search.CachingCollector} class, which caches - hits and can (quickly) replay them for the second pass. This way - you only run the query once, but you pay a RAM cost to (briefly) - hold all hits. Results are returned as a {@link - org.apache.lucene.search.grouping.TopGroups} instance.

    -
  • -
  • - Indexed groups, using a single pass collector (BlockGroupingCollectorDoc) that - is able to group according to the doc blocks created during - indexing using IndexWriter's add/updateDocuments API. - This is faster (~25% faster QPS) than the generic two-pass - collector, but it only works for doc blocks so you must statically - commit (during indexing) to which grouping you'll need at search - time. + This module abstracts away what defines group and how it is collected. All grouping collectors + are abstract and have currently term based implementations. One can implement + collectors that for example group on multiple fields. +

    -

    This implementation does not rely on a single valued grouping - field; rather, the blocks in the index define the groups, so your - application is free to determine what the grouping criteria is. - At search time, you must provide a Filter that marks - the last document in each group. This is a substantial memory - savings because this collector does not load - a DocTermsIndex from the - FieldCache. -

  • -
- -

The benefit of the arbitrary grouping implementation is you don't have -to commit at indexing time to a static grouping of your documents. -But the downside is it's somewhat slower to run, and requires more RAM -(a FieldCache.DocTermsIndex entry is created). +

+ This module abstracts away what defines group and how it is collected. All grouping collectors + are abstract and have currently term based implementations. One can implement + collectors that for example group on multiple fields. +

Known limitations:

  • For the two-pass grouping collector, the group field must be a single-valued indexed field. {@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field. -
  • Unlike Solr's implementation, this module cannot group by - function query values nor by arbitrary queries. +
  • Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only + implementations for grouping based on terms.
  • Sharding is not directly supported, though is not too difficult, if you can merge the top groups and top documents per group yourself. @@ -101,14 +83,14 @@ But the downside is it's somewhat slower to run, and requires more RAM (using the {@link org.apache.lucene.search.CachingCollector}):

    -  FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
    +  TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
     
       boolean cacheScores = true;
       double maxCacheRAMMB = 4.0;
       CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB);
       s.search(new TermQuery(new Term("content", searchTerm)), cachedCollector);
     
    -  Collection topGroups = c1.getTopGroups(groupOffset, fillFields);
    +  Collection> topGroups = c1.getTopGroups(groupOffset, fillFields);
     
       if (topGroups == null) {
         // No groups matched
    @@ -118,12 +100,12 @@ But the downside is it's somewhat slower to run, and requires more RAM
       boolean getScores = true;
       boolean getMaxScores = true;
       boolean fillFields = true;
    -  SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
    +  TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
     
       //Optionally compute total group count
    -  AllGroupsCollector allGroupsCollector = null;
    +  TermAllGroupsCollector allGroupsCollector = null;
       if (requiredTotalGroupCount) {
    -    allGroupsCollector = new AllGroupsCollector("author");
    +    allGroupsCollector = new TermAllGroupsCollector("author");
         c2 = MultiCollector.wrap(c2, allGroupsCollector);
       }
     
    @@ -135,9 +117,9 @@ But the downside is it's somewhat slower to run, and requires more RAM
         s.search(new TermQuery(new Term("content", searchTerm)), c2);
       }
             
    -  TopGroups groupsResult = c2.getTopGroups(docOffset);
    +  TopGroups groupsResult = c2.getTopGroups(docOffset);
       if (requiredTotalGroupCount) {
    -    groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
    +    groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
       }
     
       // Render groupsResult...
    diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
    index 9dc49faa71b..87745f64a18 100644
    --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
    +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
    @@ -17,9 +17,6 @@
     
     package org.apache.lucene.search.grouping;
     
    -import java.util.*;
    -import java.io.IOException;
    -
     import org.apache.lucene.analysis.MockAnalyzer;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
    @@ -33,6 +30,9 @@ import org.apache.lucene.util.BytesRef;
     import org.apache.lucene.util.LuceneTestCase;
     import org.apache.lucene.util._TestUtil;
     
    +import java.io.IOException;
    +import java.util.*;
    +
     // TODO
     //   - should test relevance sort too
     //   - test null
    @@ -103,10 +103,10 @@ public class TestGrouping extends LuceneTestCase {
         w.close();
     
         final Sort groupSort = Sort.RELEVANCE;
    -    final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10);
    +    final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(groupField, groupSort, 10);
         indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
     
    -    final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
    +    final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
         indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
     
         final TopGroups groups = c2.getTopGroups(0);
    @@ -236,7 +236,7 @@ public class TestGrouping extends LuceneTestCase {
       }
       */
     
    -  private TopGroups slowGrouping(GroupDoc[] groupDocs,
    +  private TopGroups slowGrouping(GroupDoc[] groupDocs,
                                      String searchTerm,
                                      boolean fillFields,
                                      boolean getScores,
    @@ -296,7 +296,8 @@ public class TestGrouping extends LuceneTestCase {
         final int limit = Math.min(groupOffset + topNGroups, groups.size());
     
         final Comparator docSortComp = getComparator(docSort);
    -    final GroupDocs[] result = new GroupDocs[limit-groupOffset];
    +    @SuppressWarnings("unchecked")
    +    final GroupDocs[] result = new GroupDocs[limit-groupOffset];
         int totalGroupedHitCount = 0;
         for(int idx=groupOffset;idx < limit;idx++) {
           final BytesRef group = sortedGroups.get(idx);
    @@ -321,7 +322,7 @@ public class TestGrouping extends LuceneTestCase {
             hits = new ScoreDoc[0];
           }
     
    -      result[idx-groupOffset] = new GroupDocs(0.0f,
    +      result[idx-groupOffset] = new GroupDocs(0.0f,
                                                   docs.size(),
                                                   hits,
                                                   group,
    @@ -329,12 +330,12 @@ public class TestGrouping extends LuceneTestCase {
         }
     
         if (doAllGroups) {
    -      return new TopGroups(
    -          new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
    +      return new TopGroups(
    +          new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
               knownGroups.size()
           );
         } else {
    -      return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
    +      return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
         }
       }
     
    @@ -525,14 +526,14 @@ public class TestGrouping extends LuceneTestCase {
                 System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups);
               }
     
    -          final AllGroupsCollector allGroupsCollector;
    +          final TermAllGroupsCollector allGroupsCollector;
               if (doAllGroups) {
    -            allGroupsCollector = new AllGroupsCollector("group");
    +            allGroupsCollector = new TermAllGroupsCollector("group");
               } else {
                 allGroupsCollector = null;
               }
     
    -          final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
    +          final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
               final CachingCollector cCache;
               final Collector c;
             
    @@ -583,19 +584,19 @@ public class TestGrouping extends LuceneTestCase {
                 }
               }
     
    -          final Collection topGroups = c1.getTopGroups(groupOffset, fillFields);
    +          final Collection> topGroups = c1.getTopGroups(groupOffset, fillFields);
               final TopGroups groupsResult;
     
               if (topGroups != null) {
     
                 if (VERBOSE) {
                   System.out.println("TEST: topGroups");
    -              for (SearchGroup searchGroup : topGroups) {
    +              for (SearchGroup searchGroup : topGroups) {
                     System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
                   }
                 }
     
    -            final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
    +            final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
                 if (doCache) {
                   if (cCache.isCached()) {
                     if (VERBOSE) {
    @@ -613,8 +614,8 @@ public class TestGrouping extends LuceneTestCase {
                 }
     
                 if (doAllGroups) {
    -              TopGroups tempTopGroups = c2.getTopGroups(docOffset);
    -              groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
    +              TopGroups tempTopGroups = c2.getTopGroups(docOffset);
    +              groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
                 } else {
                   groupsResult = c2.getTopGroups(docOffset);
                 }
    @@ -625,14 +626,14 @@ public class TestGrouping extends LuceneTestCase {
                 }
               }
     
    -          final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
    +          final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
     
               if (VERBOSE) {
                 if (expectedGroups == null) {
                   System.out.println("TEST: no expected groups");
                 } else {
                   System.out.println("TEST: expected groups");
    -              for(GroupDocs gd : expectedGroups.groups) {
    +              for(GroupDocs gd : expectedGroups.groups) {
                     System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()));
                     for(ScoreDoc sd : gd.scoreDocs) {
                       System.out.println("    id=" + sd.doc);
    @@ -645,21 +646,22 @@ public class TestGrouping extends LuceneTestCase {
     
               final boolean needsScores = getScores || getMaxScores || docSort == null;
               final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
    -          final AllGroupsCollector allGroupsCollector2;
    +          final TermAllGroupsCollector allGroupsCollector2;
               final Collector c4;
               if (doAllGroups) {
    -            allGroupsCollector2 = new AllGroupsCollector("group");
    +            allGroupsCollector2 = new TermAllGroupsCollector("group");
                 c4 = MultiCollector.wrap(c3, allGroupsCollector2);
               } else {
                 allGroupsCollector2 = null;
                 c4 = c3;
               }
               s2.search(new TermQuery(new Term("content", searchTerm)), c4);
    -          final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
    +          @SuppressWarnings("unchecked")
    +          final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
               final TopGroups groupsResult2;
               if (doAllGroups && tempTopGroups2 != null) {
                 assertEquals((int) tempTopGroups2.totalGroupCount, allGroupsCollector2.getGroupCount());
    -            groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount());
    +            groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount());
               } else {
                 groupsResult2 = tempTopGroups2;
               }