From d1548ca30a740f6a586ee20aa4980fab02ffe49b Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Thu, 2 Jun 2011 22:35:17 +0000
Subject: [PATCH] LUCENE-3099: allow subclasses to determine the group value
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130858 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/contrib/CHANGES.txt | 4 +
.../grouping/AbstractAllGroupsCollector.java | 67 +++++++
...> AbstractFirstPassGroupingCollector.java} | 113 ++++++------
.../AbstractSecondPassGroupingCollector.java | 156 ++++++++++++++++
.../grouping/BlockGroupingCollector.java | 9 +-
.../lucene/search/grouping/GroupDocs.java | 9 +-
.../lucene/search/grouping/SearchGroup.java | 14 +-
.../grouping/SecondPassGroupingCollector.java | 172 ------------------
...ector.java => TermAllGroupsCollector.java} | 49 ++---
.../TermFirstPassGroupingCollector.java | 85 +++++++++
.../TermSecondPassGroupingCollector.java | 76 ++++++++
.../lucene/search/grouping/TopGroups.java | 8 +-
.../lucene/search/grouping/package.html | 78 +++-----
.../lucene/search/grouping/TestGrouping.java | 52 +++---
14 files changed, 533 insertions(+), 359 deletions(-)
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java
rename modules/grouping/src/java/org/apache/lucene/search/grouping/{FirstPassGroupingCollector.java => AbstractFirstPassGroupingCollector.java} (78%)
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
delete mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java
rename modules/grouping/src/java/org/apache/lucene/search/grouping/{AllGroupsCollector.java => TermAllGroupsCollector.java} (71%)
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java
diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt
index 1aeba0c32cb..e45af1f3640 100644
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@@ -75,6 +75,10 @@ API Changes
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
(Sujit Pal via Koji Sekiguchi)
+ * LUCENE-3099: Allow subclasses to determine the group value for
+ First/SecondPassGroupingCollector. (Martijn van Groningen, Mike
+ McCandless)
+
Build
* LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java
new file mode 100644
index 00000000000..b8ac5f84411
--- /dev/null
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.search.grouping;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * A collector that collects all groups that match the
+ * query. Only the group value is collected, and the order
+ * is undefined. This collector does not determine
+ * the most relevant document of a group.
+ *
+ *
+ * This is an abstract version. Concrete implementations define
+ * what a group actually is and how it is internally collected.
+ *
+ * @lucene.experimental
+ */
+public abstract class AbstractAllGroupsCollector extends Collector {
+
+ /**
+ * Returns the total number of groups for the executed search.
+ * This is a convenience method. The following code snippet has the same effect:
getGroups().size()
+ *
+ * @return The total number of groups for the executed search
+ */
+ public int getGroupCount() {
+ return getGroups().size();
+ }
+
+ /**
+ * Returns the group values
+ *
+ * This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef}
+ * representing a group value.
+ *
+ * @return the group values
+ */
+ public abstract Collection getGroups();
+
+ // Empty not necessary
+ public void setScorer(Scorer scorer) throws IOException {}
+
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+}
\ No newline at end of file
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
similarity index 78%
rename from modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
rename to modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
index 64dd0429f41..95f56911e7c 100644
--- a/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
@@ -17,56 +17,39 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.TreeSet;
-
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FieldCache;
-import org.apache.lucene.search.FieldComparator;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.*;
+
+import java.io.IOException;
+import java.util.*;
/** FirstPassGroupingCollector is the first of two passes necessary
* to collect grouped hits. This pass gathers the top N sorted
- * groups.
+ * groups. Concrete subclasses define what a group is and how it
+ * is internally collected.
*
*
See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.
*
* @lucene.experimental
*/
+abstract public class AbstractFirstPassGroupingCollector extends Collector {
-public class FirstPassGroupingCollector extends Collector {
-
- private final String groupField;
private final Sort groupSort;
private final FieldComparator[] comparators;
private final int[] reversed;
private final int topNGroups;
- private final HashMap groupMap;
- private final BytesRef scratchBytesRef = new BytesRef();
+ private final HashMap> groupMap;
private final int compIDXEnd;
// Set once we reach topNGroups unique groups:
- private TreeSet orderedGroups;
+ private TreeSet> orderedGroups;
private int docBase;
private int spareSlot;
- private FieldCache.DocTermsIndex index;
/**
* Create the first pass collector.
*
- * @param groupField The field used to group
- * documents. This field must be single-valued and
- * indexed (FieldCache is used to access its value
- * per-document).
* @param groupSort The {@link Sort} used to sort the
* groups. The top sorted document within each group
* according to groupSort, determines how that group
@@ -74,13 +57,13 @@ public class FirstPassGroupingCollector extends Collector {
* ie, if you want to groupSort by relevance use
* Sort.RELEVANCE.
* @param topNGroups How many top groups to keep.
+ * @throws IOException If I/O related errors occur
*/
- public FirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException {
+ public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups) throws IOException {
if (topNGroups < 1) {
throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
}
- this.groupField = groupField;
// TODO: allow null groupSort to mean "by relevance",
// and specialize it?
this.groupSort = groupSort;
@@ -100,13 +83,19 @@ public class FirstPassGroupingCollector extends Collector {
}
spareSlot = topNGroups;
- groupMap = new HashMap(topNGroups);
+ groupMap = new HashMap>(topNGroups);
}
- /** Returns top groups, starting from offset. This may
- * return null, if no groups were collected, or if the
- * number of unique groups collected is <= offset. */
- public Collection getTopGroups(int groupOffset, boolean fillFields) {
+ /**
+ * Returns top groups, starting from offset. This may
+ * return null, if no groups were collected, or if the
+ * number of unique groups collected is <= offset.
+ *
+ * @param groupOffset The offset in the collected groups
+ * @param fillFields Whether to fill to {@link SearchGroup#sortValues}
+ * @return top groups, starting from offset
+ */
+ public Collection> getTopGroups(int groupOffset, boolean fillFields) {
//System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
@@ -122,15 +111,15 @@ public class FirstPassGroupingCollector extends Collector {
buildSortedSet();
}
- final Collection result = new ArrayList();
+ final Collection> result = new ArrayList>();
int upto = 0;
final int sortFieldCount = groupSort.getSort().length;
- for(CollectedSearchGroup group : orderedGroups) {
+ for(CollectedSearchGroup group : orderedGroups) {
if (upto++ < groupOffset) {
continue;
}
//System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
- SearchGroup searchGroup = new SearchGroup();
+ SearchGroup searchGroup = new SearchGroup();
searchGroup.groupValue = group.groupValue;
if (fillFields) {
searchGroup.sortValues = new Comparable[sortFieldCount];
@@ -144,10 +133,6 @@ public class FirstPassGroupingCollector extends Collector {
return result;
}
- public String getGroupField() {
- return groupField;
- }
-
@Override
public void setScorer(Scorer scorer) throws IOException {
for (FieldComparator comparator : comparators) {
@@ -189,13 +174,9 @@ public class FirstPassGroupingCollector extends Collector {
// TODO: should we add option to mean "ignore docs that
// don't have the group field" (instead of stuffing them
// under null group)?
- final int ord = index.getOrd(doc);
- //System.out.println(" ord=" + ord);
+ final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc);
- final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef);
- //System.out.println(" group=" + (br == null ? "null" : br.utf8ToString()));
-
- final CollectedSearchGroup group = groupMap.get(br);
+ final CollectedSearchGroup group = groupMap.get(groupValue);
if (group == null) {
@@ -210,8 +191,8 @@ public class FirstPassGroupingCollector extends Collector {
// just keep collecting them
// Add a new CollectedSearchGroup:
- CollectedSearchGroup sg = new CollectedSearchGroup();
- sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef);
+ CollectedSearchGroup sg = new CollectedSearchGroup();
+ sg.groupValue = copyDocGroupValue(groupValue, null);
sg.comparatorSlot = groupMap.size();
sg.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@@ -233,20 +214,14 @@ public class FirstPassGroupingCollector extends Collector {
// the bottom group with this new group.
// java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
- final CollectedSearchGroup bottomGroup = orderedGroups.last();
+ final CollectedSearchGroup bottomGroup = orderedGroups.last();
orderedGroups.remove(bottomGroup);
assert orderedGroups.size() == topNGroups -1;
groupMap.remove(bottomGroup.groupValue);
// reuse the removed CollectedSearchGroup
- if (br == null) {
- bottomGroup.groupValue = null;
- } else if (bottomGroup.groupValue != null) {
- bottomGroup.groupValue.copy(br);
- } else {
- bottomGroup.groupValue = new BytesRef(br);
- }
+ bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
bottomGroup.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@@ -291,7 +266,7 @@ public class FirstPassGroupingCollector extends Collector {
// Remove before updating the group since lookup is done via comparators
// TODO: optimize this
- final CollectedSearchGroup prevLast;
+ final CollectedSearchGroup prevLast;
if (orderedGroups != null) {
prevLast = orderedGroups.last();
orderedGroups.remove(group);
@@ -336,7 +311,7 @@ public class FirstPassGroupingCollector extends Collector {
}
};
- orderedGroups = new TreeSet(comparator);
+ orderedGroups = new TreeSet>(comparator);
orderedGroups.addAll(groupMap.values());
assert orderedGroups.size() > 0;
@@ -353,15 +328,31 @@ public class FirstPassGroupingCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
docBase = readerContext.docBase;
- index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
-
for (int i=0; i extends SearchGroup {
int topDoc;
int comparatorSlot;
}
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
new file mode 100644
index 00000000000..4d91d218a7c
--- /dev/null
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
@@ -0,0 +1,156 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.*;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * SecondPassGroupingCollector is the second of two passes
+ * necessary to collect grouped docs. This pass gathers the
+ * top N documents per top group computed from the
+ * first pass. Concrete subclasses define what a group is and how it
+ * is internally collected.
+ *
+ *
See {@link org.apache.lucene.search.grouping} for more
+ * details including a full code example.
+ *
+ * @lucene.experimental
+ */
+public abstract class AbstractSecondPassGroupingCollector extends Collector {
+
+ protected final Map> groupMap;
+ private final int maxDocsPerGroup;
+ protected SearchGroupDocs[] groupDocs;
+ private final Collection> groups;
+ private final Sort withinGroupSort;
+ private final Sort groupSort;
+
+ private int totalHitCount;
+ private int totalGroupedHitCount;
+
+ public AbstractSecondPassGroupingCollector(Collection> groups, Sort groupSort, Sort withinGroupSort,
+ int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
+ throws IOException {
+
+ //System.out.println("SP init");
+ if (groups.size() == 0) {
+ throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
+ }
+
+ this.groupSort = groupSort;
+ this.withinGroupSort = withinGroupSort;
+ this.groups = groups;
+ this.maxDocsPerGroup = maxDocsPerGroup;
+ groupMap = new HashMap>(groups.size());
+
+ for (SearchGroup group : groups) {
+ //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ final TopDocsCollector collector;
+ if (withinGroupSort == null) {
+ // Sort by score
+ collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
+ } else {
+ // Sort by fields
+ collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
+ }
+ groupMap.put(group.groupValue,
+ new SearchGroupDocs(group.groupValue,
+ collector));
+ }
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ for (SearchGroupDocs group : groupMap.values()) {
+ group.collector.setScorer(scorer);
+ }
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ totalHitCount++;
+ SearchGroupDocs group = retrieveGroup(doc);
+ if (group != null) {
+ totalGroupedHitCount++;
+ group.collector.collect(doc);
+ }
+ }
+
+ /**
+ * Returns the group the specified doc belongs to or null if no group could be retrieved.
+ *
+ * @param doc The specified doc
+ * @return the group the specified doc belongs to or null if no group could be retrieved
+ * @throws IOException If an I/O related error occurred
+ */
+ protected abstract SearchGroupDocs retrieveGroup(int doc) throws IOException;
+
+ @Override
+ public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+ //System.out.println("SP.setNextReader");
+ for (SearchGroupDocs group : groupMap.values()) {
+ group.collector.setNextReader(readerContext);
+ }
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return false;
+ }
+
+ public TopGroups getTopGroups(int withinGroupOffset) {
+ @SuppressWarnings("unchecked")
+ final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()];
+
+ int groupIDX = 0;
+ for(SearchGroup group : groups) {
+ final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
+ final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
+ groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
+ topDocs.totalHits,
+ topDocs.scoreDocs,
+ groupDocs.groupValue,
+ group.sortValues);
+ }
+
+ return new TopGroups(groupSort.getSort(),
+ withinGroupSort == null ? null : withinGroupSort.getSort(),
+ totalHitCount, totalGroupedHitCount, groupDocsResult);
+ }
+
+
+ // TODO: merge with SearchGroup or not?
+ // ad: don't need to build a new hashmap
+ // disad: blows up the size of SearchGroup if we need many of them, and couples implementations
+ public class SearchGroupDocs {
+
+ public final GROUP_VALUE_TYPE groupValue;
+ public final TopDocsCollector collector;
+
+ public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) {
+ this.groupValue = groupValue;
+ this.collector = collector;
+ }
+ }
+}
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
index d2c6eb18a13..06a7c988452 100644
--- a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
@@ -49,7 +49,7 @@ import org.apache.lucene.util.PriorityQueue;
* being that the documents in each group must always be
* indexed as a block. This collector also fills in
* TopGroups.totalGroupCount without requiring the separate
- * {@link AllGroupsCollector}. However, this collector does
+ * {@link TermAllGroupsCollector}. However, this collector does
* not fill in the groupValue of each group; this field
* will always be null.
*
@@ -317,7 +317,8 @@ public class BlockGroupingCollector extends Collector {
final FakeScorer fakeScorer = new FakeScorer();
- final GroupDocs[] groups = new GroupDocs[groupQueue.size() - groupOffset];
+ @SuppressWarnings("unchecked")
+ final GroupDocs
+
The implementation is two-pass: the first pass ({@link
+ org.apache.lucene.search.grouping.TermFirstPassGroupingCollector})
+ gathers the top groups, and the second pass ({@link
+ org.apache.lucene.search.grouping.TermSecondPassGroupingCollector})
+ gathers documents within those groups. If the search is costly to
+ run you may want to use the {@link
+ org.apache.lucene.search.CachingCollector} class, which
+ caches hits and can (quickly) replay them for the second pass. This
+ way you only run the query once, but you pay a RAM cost to (briefly)
+ hold all hits. Results are returned as a {@link
+ org.apache.lucene.search.grouping.TopGroups} instance.
+
-There are two grouping implementations here:
-
-
- Arbitrary grouping that can group by any single-valued indexed
- field, implemented as a two-pass collector: the first pass ({@link
- org.apache.lucene.search.grouping.FirstPassGroupingCollector})
- gathers the top groups, and the second pass ({@link
- org.apache.lucene.search.grouping.SecondPassGroupingCollector})
- gathers documents within those groups. If the search is costly to
- run you may want to use the {@link
- org.apache.lucene.search.CachingCollector} class, which caches
- hits and can (quickly) replay them for the second pass. This way
- you only run the query once, but you pay a RAM cost to (briefly)
- hold all hits. Results are returned as a {@link
- org.apache.lucene.search.grouping.TopGroups} instance.
-
-
- Indexed groups, using a single pass collector (BlockGroupingCollectorDoc) that
- is able to group according to the doc blocks created during
- indexing using IndexWriter's add/updateDocuments API.
- This is faster (~25% faster QPS) than the generic two-pass
- collector, but it only works for doc blocks so you must statically
- commit (during indexing) to which grouping you'll need at search
- time.
+ This module abstracts away what defines group and how it is collected. All grouping collectors
+ are abstract and have currently term based implementations. One can implement
+ collectors that for example group on multiple fields.
+
-
This implementation does not rely on a single valued grouping
- field; rather, the blocks in the index define the groups, so your
- application is free to determine what the grouping criteria is.
- At search time, you must provide a Filter that marks
- the last document in each group. This is a substantial memory
- savings because this collector does not load
- a DocTermsIndex from the
- FieldCache.
-
-
-
-
The benefit of the arbitrary grouping implementation is you don't have
-to commit at indexing time to a static grouping of your documents.
-But the downside is it's somewhat slower to run, and requires more RAM
-(a FieldCache.DocTermsIndex entry is created).
+
+ This module abstracts away what defines group and how it is collected. All grouping collectors
+ are abstract and have currently term based implementations. One can implement
+ collectors that for example group on multiple fields.
+
Known limitations:
For the two-pass grouping collector, the group field must be a
single-valued indexed field.
{@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field.
-
Unlike Solr's implementation, this module cannot group by
- function query values nor by arbitrary queries.
+
Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only
+ implementations for grouping based on terms.
Sharding is not directly supported, though is not too
difficult, if you can merge the top groups and top documents per
group yourself.
@@ -101,14 +83,14 @@ But the downside is it's somewhat slower to run, and requires more RAM
(using the {@link org.apache.lucene.search.CachingCollector}):
- FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
+ TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
boolean cacheScores = true;
double maxCacheRAMMB = 4.0;
CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB);
s.search(new TermQuery(new Term("content", searchTerm)), cachedCollector);
- Collection topGroups = c1.getTopGroups(groupOffset, fillFields);
+ Collection> topGroups = c1.getTopGroups(groupOffset, fillFields);
if (topGroups == null) {
// No groups matched
@@ -118,12 +100,12 @@ But the downside is it's somewhat slower to run, and requires more RAM
boolean getScores = true;
boolean getMaxScores = true;
boolean fillFields = true;
- SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+ TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
//Optionally compute total group count
- AllGroupsCollector allGroupsCollector = null;
+ TermAllGroupsCollector allGroupsCollector = null;
if (requiredTotalGroupCount) {
- allGroupsCollector = new AllGroupsCollector("author");
+ allGroupsCollector = new TermAllGroupsCollector("author");
c2 = MultiCollector.wrap(c2, allGroupsCollector);
}
@@ -135,9 +117,9 @@ But the downside is it's somewhat slower to run, and requires more RAM
s.search(new TermQuery(new Term("content", searchTerm)), c2);
}
- TopGroups groupsResult = c2.getTopGroups(docOffset);
+ TopGroups groupsResult = c2.getTopGroups(docOffset);
if (requiredTotalGroupCount) {
- groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
+ groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
}
// Render groupsResult...
diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
index 9dc49faa71b..87745f64a18 100644
--- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
+++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@@ -17,9 +17,6 @@
package org.apache.lucene.search.grouping;
-import java.util.*;
-import java.io.IOException;
-
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -33,6 +30,9 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import java.io.IOException;
+import java.util.*;
+
// TODO
// - should test relevance sort too
// - test null
@@ -103,10 +103,10 @@ public class TestGrouping extends LuceneTestCase {
w.close();
final Sort groupSort = Sort.RELEVANCE;
- final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10);
+ final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
- final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
+ final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
final TopGroups groups = c2.getTopGroups(0);
@@ -236,7 +236,7 @@ public class TestGrouping extends LuceneTestCase {
}
*/
- private TopGroups slowGrouping(GroupDoc[] groupDocs,
+ private TopGroups slowGrouping(GroupDoc[] groupDocs,
String searchTerm,
boolean fillFields,
boolean getScores,
@@ -296,7 +296,8 @@ public class TestGrouping extends LuceneTestCase {
final int limit = Math.min(groupOffset + topNGroups, groups.size());
final Comparator docSortComp = getComparator(docSort);
- final GroupDocs[] result = new GroupDocs[limit-groupOffset];
+ @SuppressWarnings("unchecked")
+ final GroupDocs[] result = new GroupDocs[limit-groupOffset];
int totalGroupedHitCount = 0;
for(int idx=groupOffset;idx < limit;idx++) {
final BytesRef group = sortedGroups.get(idx);
@@ -321,7 +322,7 @@ public class TestGrouping extends LuceneTestCase {
hits = new ScoreDoc[0];
}
- result[idx-groupOffset] = new GroupDocs(0.0f,
+ result[idx-groupOffset] = new GroupDocs(0.0f,
docs.size(),
hits,
group,
@@ -329,12 +330,12 @@ public class TestGrouping extends LuceneTestCase {
}
if (doAllGroups) {
- return new TopGroups(
- new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
+ return new TopGroups(
+ new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
knownGroups.size()
);
} else {
- return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
+ return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
}
}
@@ -525,14 +526,14 @@ public class TestGrouping extends LuceneTestCase {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups);
}
- final AllGroupsCollector allGroupsCollector;
+ final TermAllGroupsCollector allGroupsCollector;
if (doAllGroups) {
- allGroupsCollector = new AllGroupsCollector("group");
+ allGroupsCollector = new TermAllGroupsCollector("group");
} else {
allGroupsCollector = null;
}
- final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
+ final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
@@ -583,19 +584,19 @@ public class TestGrouping extends LuceneTestCase {
}
}
- final Collection topGroups = c1.getTopGroups(groupOffset, fillFields);
+ final Collection> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
- for (SearchGroup searchGroup : topGroups) {
+ for (SearchGroup searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
- final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+ final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {
@@ -613,8 +614,8 @@ public class TestGrouping extends LuceneTestCase {
}
if (doAllGroups) {
- TopGroups tempTopGroups = c2.getTopGroups(docOffset);
- groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
+ TopGroups tempTopGroups = c2.getTopGroups(docOffset);
+ groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
} else {
groupsResult = c2.getTopGroups(docOffset);
}
@@ -625,14 +626,14 @@ public class TestGrouping extends LuceneTestCase {
}
}
- final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
+ final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
if (VERBOSE) {
if (expectedGroups == null) {
System.out.println("TEST: no expected groups");
} else {
System.out.println("TEST: expected groups");
- for(GroupDocs gd : expectedGroups.groups) {
+ for(GroupDocs gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()));
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc);
@@ -645,21 +646,22 @@ public class TestGrouping extends LuceneTestCase {
final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
- final AllGroupsCollector allGroupsCollector2;
+ final TermAllGroupsCollector allGroupsCollector2;
final Collector c4;
if (doAllGroups) {
- allGroupsCollector2 = new AllGroupsCollector("group");
+ allGroupsCollector2 = new TermAllGroupsCollector("group");
c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else {
allGroupsCollector2 = null;
c4 = c3;
}
s2.search(new TermQuery(new Term("content", searchTerm)), c4);
- final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
+ @SuppressWarnings("unchecked")
+ final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups groupsResult2;
if (doAllGroups && tempTopGroups2 != null) {
assertEquals((int) tempTopGroups2.totalGroupCount, allGroupsCollector2.getGroupCount());
- groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount());
+ groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount());
} else {
groupsResult2 = tempTopGroups2;
}