From 710e630e91d0077a34950421ff1d492d2119c57a Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Thu, 2 Jun 2011 22:21:29 +0000
Subject: [PATCH 01/29] CachingCollector must setScorer per segment
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130852 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/src/java/org/apache/lucene/search/CachingCollector.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lucene/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/src/java/org/apache/lucene/search/CachingCollector.java
index 2b90a394748..c17602794fb 100644
--- a/lucene/src/java/org/apache/lucene/search/CachingCollector.java
+++ b/lucene/src/java/org/apache/lucene/search/CachingCollector.java
@@ -168,10 +168,10 @@ public abstract class CachingCollector extends Collector {
int curUpto = 0;
int curBase = 0;
int chunkUpto = 0;
- other.setScorer(cachedScorer);
curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext);
+ other.setScorer(cachedScorer);
while (curBase + curUpto < seg.end) {
if (curUpto == curDocs.length) {
curBase += curDocs.length;
From d1548ca30a740f6a586ee20aa4980fab02ffe49b Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Thu, 2 Jun 2011 22:35:17 +0000
Subject: [PATCH 02/29] LUCENE-3099: allow subclasses to determine the group
value
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130858 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/contrib/CHANGES.txt | 4 +
.../grouping/AbstractAllGroupsCollector.java | 67 +++++++
...> AbstractFirstPassGroupingCollector.java} | 113 ++++++------
.../AbstractSecondPassGroupingCollector.java | 156 ++++++++++++++++
.../grouping/BlockGroupingCollector.java | 9 +-
.../lucene/search/grouping/GroupDocs.java | 9 +-
.../lucene/search/grouping/SearchGroup.java | 14 +-
.../grouping/SecondPassGroupingCollector.java | 172 ------------------
...ector.java => TermAllGroupsCollector.java} | 49 ++---
.../TermFirstPassGroupingCollector.java | 85 +++++++++
.../TermSecondPassGroupingCollector.java | 76 ++++++++
.../lucene/search/grouping/TopGroups.java | 8 +-
.../lucene/search/grouping/package.html | 78 +++-----
.../lucene/search/grouping/TestGrouping.java | 52 +++---
14 files changed, 533 insertions(+), 359 deletions(-)
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java
rename modules/grouping/src/java/org/apache/lucene/search/grouping/{FirstPassGroupingCollector.java => AbstractFirstPassGroupingCollector.java} (78%)
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
delete mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java
rename modules/grouping/src/java/org/apache/lucene/search/grouping/{AllGroupsCollector.java => TermAllGroupsCollector.java} (71%)
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java
create mode 100644 modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java
diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt
index 1aeba0c32cb..e45af1f3640 100644
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@@ -75,6 +75,10 @@ API Changes
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
(Sujit Pal via Koji Sekiguchi)
+ * LUCENE-3099: Allow subclasses to determine the group value for
+ First/SecondPassGroupingCollector. (Martijn van Groningen, Mike
+ McCandless)
+
Build
* LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java
new file mode 100644
index 00000000000..b8ac5f84411
--- /dev/null
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.search.grouping;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * A collector that collects all groups that match the
+ * query. Only the group value is collected, and the order
+ * is undefined. This collector does not determine
+ * the most relevant document of a group.
+ *
+ *
+ * This is an abstract version. Concrete implementations define
+ * what a group actually is and how it is internally collected.
+ *
+ * @lucene.experimental
+ */
+public abstract class AbstractAllGroupsCollector extends Collector {
+
+ /**
+ * Returns the total number of groups for the executed search.
+ * This is a convenience method. The following code snippet has the same effect:
getGroups().size()
+ *
+ * @return The total number of groups for the executed search
+ */
+ public int getGroupCount() {
+ return getGroups().size();
+ }
+
+ /**
+ * Returns the group values
+ *
+ * This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef}
+ * representing a group value.
+ *
+ * @return the group values
+ */
+ public abstract Collection getGroups();
+
+ // Empty not necessary
+ public void setScorer(Scorer scorer) throws IOException {}
+
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+}
\ No newline at end of file
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
similarity index 78%
rename from modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
rename to modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
index 64dd0429f41..95f56911e7c 100644
--- a/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
@@ -17,56 +17,39 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.TreeSet;
-
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FieldCache;
-import org.apache.lucene.search.FieldComparator;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.*;
+
+import java.io.IOException;
+import java.util.*;
/** FirstPassGroupingCollector is the first of two passes necessary
* to collect grouped hits. This pass gathers the top N sorted
- * groups.
+ * groups. Concrete subclasses define what a group is and how it
+ * is internally collected.
*
*
See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.
*
* @lucene.experimental
*/
+abstract public class AbstractFirstPassGroupingCollector extends Collector {
-public class FirstPassGroupingCollector extends Collector {
-
- private final String groupField;
private final Sort groupSort;
private final FieldComparator[] comparators;
private final int[] reversed;
private final int topNGroups;
- private final HashMap groupMap;
- private final BytesRef scratchBytesRef = new BytesRef();
+ private final HashMap> groupMap;
private final int compIDXEnd;
// Set once we reach topNGroups unique groups:
- private TreeSet orderedGroups;
+ private TreeSet> orderedGroups;
private int docBase;
private int spareSlot;
- private FieldCache.DocTermsIndex index;
/**
* Create the first pass collector.
*
- * @param groupField The field used to group
- * documents. This field must be single-valued and
- * indexed (FieldCache is used to access its value
- * per-document).
* @param groupSort The {@link Sort} used to sort the
* groups. The top sorted document within each group
* according to groupSort, determines how that group
@@ -74,13 +57,13 @@ public class FirstPassGroupingCollector extends Collector {
* ie, if you want to groupSort by relevance use
* Sort.RELEVANCE.
* @param topNGroups How many top groups to keep.
+ * @throws IOException If I/O related errors occur
*/
- public FirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException {
+ public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups) throws IOException {
if (topNGroups < 1) {
throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
}
- this.groupField = groupField;
// TODO: allow null groupSort to mean "by relevance",
// and specialize it?
this.groupSort = groupSort;
@@ -100,13 +83,19 @@ public class FirstPassGroupingCollector extends Collector {
}
spareSlot = topNGroups;
- groupMap = new HashMap(topNGroups);
+ groupMap = new HashMap>(topNGroups);
}
- /** Returns top groups, starting from offset. This may
- * return null, if no groups were collected, or if the
- * number of unique groups collected is <= offset. */
- public Collection getTopGroups(int groupOffset, boolean fillFields) {
+ /**
+ * Returns top groups, starting from offset. This may
+ * return null, if no groups were collected, or if the
+ * number of unique groups collected is <= offset.
+ *
+ * @param groupOffset The offset in the collected groups
+ * @param fillFields Whether to fill to {@link SearchGroup#sortValues}
+ * @return top groups, starting from offset
+ */
+ public Collection> getTopGroups(int groupOffset, boolean fillFields) {
//System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
@@ -122,15 +111,15 @@ public class FirstPassGroupingCollector extends Collector {
buildSortedSet();
}
- final Collection result = new ArrayList();
+ final Collection> result = new ArrayList>();
int upto = 0;
final int sortFieldCount = groupSort.getSort().length;
- for(CollectedSearchGroup group : orderedGroups) {
+ for(CollectedSearchGroup group : orderedGroups) {
if (upto++ < groupOffset) {
continue;
}
//System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
- SearchGroup searchGroup = new SearchGroup();
+ SearchGroup searchGroup = new SearchGroup();
searchGroup.groupValue = group.groupValue;
if (fillFields) {
searchGroup.sortValues = new Comparable[sortFieldCount];
@@ -144,10 +133,6 @@ public class FirstPassGroupingCollector extends Collector {
return result;
}
- public String getGroupField() {
- return groupField;
- }
-
@Override
public void setScorer(Scorer scorer) throws IOException {
for (FieldComparator comparator : comparators) {
@@ -189,13 +174,9 @@ public class FirstPassGroupingCollector extends Collector {
// TODO: should we add option to mean "ignore docs that
// don't have the group field" (instead of stuffing them
// under null group)?
- final int ord = index.getOrd(doc);
- //System.out.println(" ord=" + ord);
+ final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc);
- final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef);
- //System.out.println(" group=" + (br == null ? "null" : br.utf8ToString()));
-
- final CollectedSearchGroup group = groupMap.get(br);
+ final CollectedSearchGroup group = groupMap.get(groupValue);
if (group == null) {
@@ -210,8 +191,8 @@ public class FirstPassGroupingCollector extends Collector {
// just keep collecting them
// Add a new CollectedSearchGroup:
- CollectedSearchGroup sg = new CollectedSearchGroup();
- sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef);
+ CollectedSearchGroup sg = new CollectedSearchGroup();
+ sg.groupValue = copyDocGroupValue(groupValue, null);
sg.comparatorSlot = groupMap.size();
sg.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@@ -233,20 +214,14 @@ public class FirstPassGroupingCollector extends Collector {
// the bottom group with this new group.
// java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
- final CollectedSearchGroup bottomGroup = orderedGroups.last();
+ final CollectedSearchGroup bottomGroup = orderedGroups.last();
orderedGroups.remove(bottomGroup);
assert orderedGroups.size() == topNGroups -1;
groupMap.remove(bottomGroup.groupValue);
// reuse the removed CollectedSearchGroup
- if (br == null) {
- bottomGroup.groupValue = null;
- } else if (bottomGroup.groupValue != null) {
- bottomGroup.groupValue.copy(br);
- } else {
- bottomGroup.groupValue = new BytesRef(br);
- }
+ bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
bottomGroup.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@@ -291,7 +266,7 @@ public class FirstPassGroupingCollector extends Collector {
// Remove before updating the group since lookup is done via comparators
// TODO: optimize this
- final CollectedSearchGroup prevLast;
+ final CollectedSearchGroup prevLast;
if (orderedGroups != null) {
prevLast = orderedGroups.last();
orderedGroups.remove(group);
@@ -336,7 +311,7 @@ public class FirstPassGroupingCollector extends Collector {
}
};
- orderedGroups = new TreeSet(comparator);
+ orderedGroups = new TreeSet>(comparator);
orderedGroups.addAll(groupMap.values());
assert orderedGroups.size() > 0;
@@ -353,15 +328,31 @@ public class FirstPassGroupingCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
docBase = readerContext.docBase;
- index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
-
for (int i=0; i extends SearchGroup {
int topDoc;
int comparatorSlot;
}
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
new file mode 100644
index 00000000000..4d91d218a7c
--- /dev/null
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
@@ -0,0 +1,156 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.*;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * SecondPassGroupingCollector is the second of two passes
+ * necessary to collect grouped docs. This pass gathers the
+ * top N documents per top group computed from the
+ * first pass. Concrete subclasses define what a group is and how it
+ * is internally collected.
+ *
+ *
See {@link org.apache.lucene.search.grouping} for more
+ * details including a full code example.
+ *
+ * @lucene.experimental
+ */
+public abstract class AbstractSecondPassGroupingCollector extends Collector {
+
+ protected final Map> groupMap;
+ private final int maxDocsPerGroup;
+ protected SearchGroupDocs[] groupDocs;
+ private final Collection> groups;
+ private final Sort withinGroupSort;
+ private final Sort groupSort;
+
+ private int totalHitCount;
+ private int totalGroupedHitCount;
+
+ public AbstractSecondPassGroupingCollector(Collection> groups, Sort groupSort, Sort withinGroupSort,
+ int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
+ throws IOException {
+
+ //System.out.println("SP init");
+ if (groups.size() == 0) {
+ throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
+ }
+
+ this.groupSort = groupSort;
+ this.withinGroupSort = withinGroupSort;
+ this.groups = groups;
+ this.maxDocsPerGroup = maxDocsPerGroup;
+ groupMap = new HashMap>(groups.size());
+
+ for (SearchGroup group : groups) {
+ //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ final TopDocsCollector collector;
+ if (withinGroupSort == null) {
+ // Sort by score
+ collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
+ } else {
+ // Sort by fields
+ collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
+ }
+ groupMap.put(group.groupValue,
+ new SearchGroupDocs(group.groupValue,
+ collector));
+ }
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ for (SearchGroupDocs group : groupMap.values()) {
+ group.collector.setScorer(scorer);
+ }
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ totalHitCount++;
+ SearchGroupDocs group = retrieveGroup(doc);
+ if (group != null) {
+ totalGroupedHitCount++;
+ group.collector.collect(doc);
+ }
+ }
+
+ /**
+ * Returns the group the specified doc belongs to or null if no group could be retrieved.
+ *
+ * @param doc The specified doc
+ * @return the group the specified doc belongs to or null if no group could be retrieved
+ * @throws IOException If an I/O related error occurred
+ */
+ protected abstract SearchGroupDocs retrieveGroup(int doc) throws IOException;
+
+ @Override
+ public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+ //System.out.println("SP.setNextReader");
+ for (SearchGroupDocs group : groupMap.values()) {
+ group.collector.setNextReader(readerContext);
+ }
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return false;
+ }
+
+ public TopGroups getTopGroups(int withinGroupOffset) {
+ @SuppressWarnings("unchecked")
+ final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()];
+
+ int groupIDX = 0;
+ for(SearchGroup group : groups) {
+ final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
+ final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
+ groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
+ topDocs.totalHits,
+ topDocs.scoreDocs,
+ groupDocs.groupValue,
+ group.sortValues);
+ }
+
+ return new TopGroups(groupSort.getSort(),
+ withinGroupSort == null ? null : withinGroupSort.getSort(),
+ totalHitCount, totalGroupedHitCount, groupDocsResult);
+ }
+
+
+ // TODO: merge with SearchGroup or not?
+ // ad: don't need to build a new hashmap
+ // disad: blows up the size of SearchGroup if we need many of them, and couples implementations
+ public class SearchGroupDocs {
+
+ public final GROUP_VALUE_TYPE groupValue;
+ public final TopDocsCollector collector;
+
+ public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) {
+ this.groupValue = groupValue;
+ this.collector = collector;
+ }
+ }
+}
diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
index d2c6eb18a13..06a7c988452 100644
--- a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
+++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
@@ -49,7 +49,7 @@ import org.apache.lucene.util.PriorityQueue;
* being that the documents in each group must always be
* indexed as a block. This collector also fills in
* TopGroups.totalGroupCount without requiring the separate
- * {@link AllGroupsCollector}. However, this collector does
+ * {@link TermAllGroupsCollector}. However, this collector does
* not fill in the groupValue of each group; this field
* will always be null.
*
@@ -317,7 +317,8 @@ public class BlockGroupingCollector extends Collector {
final FakeScorer fakeScorer = new FakeScorer();
- final GroupDocs[] groups = new GroupDocs[groupQueue.size() - groupOffset];
+ @SuppressWarnings("unchecked")
+ final GroupDocs
+
The implementation is two-pass: the first pass ({@link
+ org.apache.lucene.search.grouping.TermFirstPassGroupingCollector})
+ gathers the top groups, and the second pass ({@link
+ org.apache.lucene.search.grouping.TermSecondPassGroupingCollector})
+ gathers documents within those groups. If the search is costly to
+ run you may want to use the {@link
+ org.apache.lucene.search.CachingCollector} class, which
+ caches hits and can (quickly) replay them for the second pass. This
+ way you only run the query once, but you pay a RAM cost to (briefly)
+ hold all hits. Results are returned as a {@link
+ org.apache.lucene.search.grouping.TopGroups} instance.
+
-There are two grouping implementations here:
-
-
- Arbitrary grouping that can group by any single-valued indexed
- field, implemented as a two-pass collector: the first pass ({@link
- org.apache.lucene.search.grouping.FirstPassGroupingCollector})
- gathers the top groups, and the second pass ({@link
- org.apache.lucene.search.grouping.SecondPassGroupingCollector})
- gathers documents within those groups. If the search is costly to
- run you may want to use the {@link
- org.apache.lucene.search.CachingCollector} class, which caches
- hits and can (quickly) replay them for the second pass. This way
- you only run the query once, but you pay a RAM cost to (briefly)
- hold all hits. Results are returned as a {@link
- org.apache.lucene.search.grouping.TopGroups} instance.
-
-
- Indexed groups, using a single pass collector (BlockGroupingCollectorDoc) that
- is able to group according to the doc blocks created during
- indexing using IndexWriter's add/updateDocuments API.
- This is faster (~25% faster QPS) than the generic two-pass
- collector, but it only works for doc blocks so you must statically
- commit (during indexing) to which grouping you'll need at search
- time.
+ This module abstracts away what defines group and how it is collected. All grouping collectors
+ are abstract and have currently term based implementations. One can implement
+ collectors that for example group on multiple fields.
+
-
This implementation does not rely on a single valued grouping
- field; rather, the blocks in the index define the groups, so your
- application is free to determine what the grouping criteria is.
- At search time, you must provide a Filter that marks
- the last document in each group. This is a substantial memory
- savings because this collector does not load
- a DocTermsIndex from the
- FieldCache.
-
-
-
-
The benefit of the arbitrary grouping implementation is you don't have
-to commit at indexing time to a static grouping of your documents.
-But the downside is it's somewhat slower to run, and requires more RAM
-(a FieldCache.DocTermsIndex entry is created).
+
+ This module abstracts away what defines group and how it is collected. All grouping collectors
+ are abstract and have currently term based implementations. One can implement
+ collectors that for example group on multiple fields.
+
Known limitations:
For the two-pass grouping collector, the group field must be a
single-valued indexed field.
{@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field.
-
Unlike Solr's implementation, this module cannot group by
- function query values nor by arbitrary queries.
+
Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only
+ implementations for grouping based on terms.
Sharding is not directly supported, though is not too
difficult, if you can merge the top groups and top documents per
group yourself.
@@ -101,14 +83,14 @@ But the downside is it's somewhat slower to run, and requires more RAM
(using the {@link org.apache.lucene.search.CachingCollector}):
- FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
+ TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
boolean cacheScores = true;
double maxCacheRAMMB = 4.0;
CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB);
s.search(new TermQuery(new Term("content", searchTerm)), cachedCollector);
- Collection topGroups = c1.getTopGroups(groupOffset, fillFields);
+ Collection> topGroups = c1.getTopGroups(groupOffset, fillFields);
if (topGroups == null) {
// No groups matched
@@ -118,12 +100,12 @@ But the downside is it's somewhat slower to run, and requires more RAM
boolean getScores = true;
boolean getMaxScores = true;
boolean fillFields = true;
- SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+ TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
//Optionally compute total group count
- AllGroupsCollector allGroupsCollector = null;
+ TermAllGroupsCollector allGroupsCollector = null;
if (requiredTotalGroupCount) {
- allGroupsCollector = new AllGroupsCollector("author");
+ allGroupsCollector = new TermAllGroupsCollector("author");
c2 = MultiCollector.wrap(c2, allGroupsCollector);
}
@@ -135,9 +117,9 @@ But the downside is it's somewhat slower to run, and requires more RAM
s.search(new TermQuery(new Term("content", searchTerm)), c2);
}
- TopGroups groupsResult = c2.getTopGroups(docOffset);
+ TopGroups groupsResult = c2.getTopGroups(docOffset);
if (requiredTotalGroupCount) {
- groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
+ groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
}
// Render groupsResult...
diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
index 9dc49faa71b..87745f64a18 100644
--- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
+++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@@ -17,9 +17,6 @@
package org.apache.lucene.search.grouping;
-import java.util.*;
-import java.io.IOException;
-
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -33,6 +30,9 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import java.io.IOException;
+import java.util.*;
+
// TODO
// - should test relevance sort too
// - test null
@@ -103,10 +103,10 @@ public class TestGrouping extends LuceneTestCase {
w.close();
final Sort groupSort = Sort.RELEVANCE;
- final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10);
+ final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
- final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
+ final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
final TopGroups groups = c2.getTopGroups(0);
@@ -236,7 +236,7 @@ public class TestGrouping extends LuceneTestCase {
}
*/
- private TopGroups slowGrouping(GroupDoc[] groupDocs,
+ private TopGroups slowGrouping(GroupDoc[] groupDocs,
String searchTerm,
boolean fillFields,
boolean getScores,
@@ -296,7 +296,8 @@ public class TestGrouping extends LuceneTestCase {
final int limit = Math.min(groupOffset + topNGroups, groups.size());
final Comparator docSortComp = getComparator(docSort);
- final GroupDocs[] result = new GroupDocs[limit-groupOffset];
+ @SuppressWarnings("unchecked")
+ final GroupDocs[] result = new GroupDocs[limit-groupOffset];
int totalGroupedHitCount = 0;
for(int idx=groupOffset;idx < limit;idx++) {
final BytesRef group = sortedGroups.get(idx);
@@ -321,7 +322,7 @@ public class TestGrouping extends LuceneTestCase {
hits = new ScoreDoc[0];
}
- result[idx-groupOffset] = new GroupDocs(0.0f,
+ result[idx-groupOffset] = new GroupDocs(0.0f,
docs.size(),
hits,
group,
@@ -329,12 +330,12 @@ public class TestGrouping extends LuceneTestCase {
}
if (doAllGroups) {
- return new TopGroups(
- new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
+ return new TopGroups(
+ new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
knownGroups.size()
);
} else {
- return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
+ return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
}
}
@@ -525,14 +526,14 @@ public class TestGrouping extends LuceneTestCase {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups);
}
- final AllGroupsCollector allGroupsCollector;
+ final TermAllGroupsCollector allGroupsCollector;
if (doAllGroups) {
- allGroupsCollector = new AllGroupsCollector("group");
+ allGroupsCollector = new TermAllGroupsCollector("group");
} else {
allGroupsCollector = null;
}
- final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
+ final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
@@ -583,19 +584,19 @@ public class TestGrouping extends LuceneTestCase {
}
}
- final Collection topGroups = c1.getTopGroups(groupOffset, fillFields);
+ final Collection> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
- for (SearchGroup searchGroup : topGroups) {
+ for (SearchGroup searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
- final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+ final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {
@@ -613,8 +614,8 @@ public class TestGrouping extends LuceneTestCase {
}
if (doAllGroups) {
- TopGroups tempTopGroups = c2.getTopGroups(docOffset);
- groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
+ TopGroups tempTopGroups = c2.getTopGroups(docOffset);
+ groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
} else {
groupsResult = c2.getTopGroups(docOffset);
}
@@ -625,14 +626,14 @@ public class TestGrouping extends LuceneTestCase {
}
}
- final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
+ final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
if (VERBOSE) {
if (expectedGroups == null) {
System.out.println("TEST: no expected groups");
} else {
System.out.println("TEST: expected groups");
- for(GroupDocs gd : expectedGroups.groups) {
+ for(GroupDocs gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()));
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc);
@@ -645,21 +646,22 @@ public class TestGrouping extends LuceneTestCase {
final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
- final AllGroupsCollector allGroupsCollector2;
+ final TermAllGroupsCollector allGroupsCollector2;
final Collector c4;
if (doAllGroups) {
- allGroupsCollector2 = new AllGroupsCollector("group");
+ allGroupsCollector2 = new TermAllGroupsCollector("group");
c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else {
allGroupsCollector2 = null;
c4 = c3;
}
s2.search(new TermQuery(new Term("content", searchTerm)), c4);
- final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
+ @SuppressWarnings("unchecked")
+ final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups groupsResult2;
if (doAllGroups && tempTopGroups2 != null) {
assertEquals((int) tempTopGroups2.totalGroupCount, allGroupsCollector2.getGroupCount());
- groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount());
+ groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount());
} else {
groupsResult2 = tempTopGroups2;
}
From 526d6c937ed439dc3d1644db56cb887d85334cca Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Thu, 2 Jun 2011 22:42:27 +0000
Subject: [PATCH 03/29] LUCENE-3099: fix AllGroupsCollectorTest
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130859 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/search/grouping/AllGroupsCollectorTest.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
index 00153e7f997..cff33202df3 100644
--- a/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
+++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
@@ -91,15 +91,15 @@ public class AllGroupsCollectorTest extends LuceneTestCase {
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
- AllGroupsCollector c1 = new AllGroupsCollector(groupField);
+ TermAllGroupsCollector c1 = new TermAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
assertEquals(4, c1.getGroupCount());
- AllGroupsCollector c2 = new AllGroupsCollector(groupField);
+ TermAllGroupsCollector c2 = new TermAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "some")), c2);
assertEquals(3, c2.getGroupCount());
- AllGroupsCollector c3 = new AllGroupsCollector(groupField);
+ TermAllGroupsCollector c3 = new TermAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "blob")), c3);
assertEquals(2, c3.getGroupCount());
From 6f4155b808f1a7724283cdf81a82dacd8869a295 Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Thu, 2 Jun 2011 22:48:09 +0000
Subject: [PATCH 04/29] add CHANGES entry for bug fix to CachingCollector
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130861 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/CHANGES.txt | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 94224583d87..bb247f22793 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -458,6 +458,9 @@ Bug fixes
including locks, and fails if the test fails to release all of them.
(Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer)
+* LUCENE-3102: CachingCollector.replay was failing to call setScorer
+ per-segment (Martijn van Groningen via Mike McCandless)
+
New Features
* LUCENE-3140: Added experimental FST implementation to Lucene.
From c7b59aec0b09aa262433658f998e15960c48012d Mon Sep 17 00:00:00 2001
From: Dawid Weiss
Date: Fri, 3 Jun 2011 09:19:28 +0000
Subject: [PATCH 05/29] Making isFinal public for completeness if somebody
wants to go with Mealy's traversals.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130954 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/src/java/org/apache/lucene/util/fst/FST.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lucene/src/java/org/apache/lucene/util/fst/FST.java b/lucene/src/java/org/apache/lucene/util/fst/FST.java
index 04428c6b356..bbed472acee 100644
--- a/lucene/src/java/org/apache/lucene/util/fst/FST.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/FST.java
@@ -147,7 +147,7 @@ public class FST {
return flag(BIT_LAST_ARC);
}
- boolean isFinal() {
+ public boolean isFinal() {
return flag(BIT_FINAL_ARC);
}
};
From 9e4a656592d83f4190858611686f7b833ef58bea Mon Sep 17 00:00:00 2001
From: Dawid Weiss
Date: Fri, 3 Jun 2011 10:39:33 +0000
Subject: [PATCH 06/29] Adding more JavaDoc to FST Builder because the
arguments are rather cryptic right now.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130976 13f79535-47bb-0310-9956-ffa450edef68
---
.../org/apache/lucene/util/fst/Builder.java | 37 +++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/lucene/src/java/org/apache/lucene/util/fst/Builder.java b/lucene/src/java/org/apache/lucene/util/fst/Builder.java
index b5736264e2d..9d25a1482a5 100644
--- a/lucene/src/java/org/apache/lucene/util/fst/Builder.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/Builder.java
@@ -21,6 +21,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import java.io.IOException;
@@ -69,6 +70,42 @@ public class Builder {
// current "frontier"
private UnCompiledNode[] frontier;
+ /**
+ * Instantiates an FST/FSA builder without any pruning. A shortcut
+ * to {@link #Builder(INPUT_TYPE, int, int, boolean, Outputs)} with
+ * pruning options turned off.
+ */
+ public Builder(FST.INPUT_TYPE inputType, Outputs outputs)
+ {
+ this(inputType, 0, 0, true, outputs);
+ }
+
+ /**
+ * Instantiates an FST/FSA builder with all the possible tuning and construction
+ * tweaks. Read parameter documentation carefully.
+ *
+ * @param inputType
+ * The input type (transition labels). Can be anything from {@link INPUT_TYPE}
+ * enumeration. Shorter types will consume less memory. Strings (character sequences) are
+ * represented as {@link INPUT_TYPE#BYTE4} (full unicode codepoints).
+ *
+ * @param minSuffixCount1
+ * If pruning the input graph during construction, this threshold is used for telling
+ * if a node is kept or pruned. If transition_count(node) >= minSuffixCount1, the node
+ * is kept.
+ *
+ * @param minSuffixCount2
+ * (Note: only Mike McCandless knows what this one is really doing...)
+ *
+ * @param doMinSuffix
+ * If true, the shared suffixes will be compacted into unique paths.
+ * This requires an additional hash map for lookups in memory. Setting this parameter to
+ * false creates a single path for all input sequences. This will result in a larger
+ * graph, but may require less memory and will speed up construction.
+ * @param outputs The output type for each input sequence. Applies only if building an FST. For
+ * FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
+ * singleton output object.
+ */
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doMinSuffix, Outputs outputs) {
this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2;
From 896bcea27fc7758e80066ab5fbe11777af7508a9 Mon Sep 17 00:00:00 2001
From: Dawid Weiss
Date: Fri, 3 Jun 2011 10:39:55 +0000
Subject: [PATCH 07/29] Making NO_OUTPUT a true singleton within a single class
loader. I didn't see the point of it being class-level, but correct me if I'm
wrong.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130977 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/src/java/org/apache/lucene/util/fst/NoOutputs.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lucene/src/java/org/apache/lucene/util/fst/NoOutputs.java b/lucene/src/java/org/apache/lucene/util/fst/NoOutputs.java
index 40404a3fb90..1b1e97e12f5 100644
--- a/lucene/src/java/org/apache/lucene/util/fst/NoOutputs.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/NoOutputs.java
@@ -28,7 +28,7 @@ import org.apache.lucene.store.DataOutput;
public final class NoOutputs extends Outputs {
- final Object NO_OUTPUT = new Object() {
+ static final Object NO_OUTPUT = new Object() {
// NodeHash calls hashCode for this output; we fix this
// so we get deterministic hashing.
@Override
From 6f607a5fda4abddd60a8160c327b18254a7506ce Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Fri, 3 Jun 2011 12:04:19 +0000
Subject: [PATCH 08/29] fix javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131005 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/src/java/org/apache/lucene/util/fst/Builder.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lucene/src/java/org/apache/lucene/util/fst/Builder.java b/lucene/src/java/org/apache/lucene/util/fst/Builder.java
index 9d25a1482a5..99378cfb2fa 100644
--- a/lucene/src/java/org/apache/lucene/util/fst/Builder.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/Builder.java
@@ -72,7 +72,7 @@ public class Builder {
/**
* Instantiates an FST/FSA builder without any pruning. A shortcut
- * to {@link #Builder(INPUT_TYPE, int, int, boolean, Outputs)} with
+ * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, Outputs)} with
* pruning options turned off.
*/
public Builder(FST.INPUT_TYPE inputType, Outputs outputs)
From d90d10e48e6dfeec37643e4bbdee6c154c473df1 Mon Sep 17 00:00:00 2001
From: Yonik Seeley
Date: Fri, 3 Jun 2011 13:30:27 +0000
Subject: [PATCH 09/29] LUCENE-2514: fix NPE in solr's toString when range
endpoints are null
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131029 13f79535-47bb-0310-9956-ffa450edef68
---
.../org/apache/solr/search/QueryParsing.java | 24 ++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/solr/src/java/org/apache/solr/search/QueryParsing.java b/solr/src/java/org/apache/solr/search/QueryParsing.java
index fd977c21b28..22a68f8266a 100644
--- a/solr/src/java/org/apache/solr/search/QueryParsing.java
+++ b/solr/src/java/org/apache/solr/search/QueryParsing.java
@@ -32,6 +32,8 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
@@ -382,6 +384,22 @@ public class QueryParsing {
}
}
+ static void writeFieldVal(BytesRef val, FieldType ft, Appendable out, int flags) throws IOException {
+ if (ft != null) {
+ try {
+ CharsRef readable = new CharsRef();
+ ft.indexedToReadable(val, readable);
+ out.append(readable);
+ } catch (Exception e) {
+ out.append("EXCEPTION(val=");
+ out.append(val.utf8ToString());
+ out.append(")");
+ }
+ } else {
+ out.append(val.utf8ToString());
+ }
+ }
+
/**
* @see #toString(Query,IndexSchema)
*/
@@ -392,14 +410,14 @@ public class QueryParsing {
TermQuery q = (TermQuery) query;
Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags);
- writeFieldVal(t.text(), ft, out, flags);
+ writeFieldVal(t.bytes(), ft, out, flags);
} else if (query instanceof TermRangeQuery) {
TermRangeQuery q = (TermRangeQuery) query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesLower() ? '[' : '{');
- String lt = q.getLowerTerm().utf8ToString();
- String ut = q.getUpperTerm().utf8ToString();
+ BytesRef lt = q.getLowerTerm();
+ BytesRef ut = q.getUpperTerm();
if (lt == null) {
out.append('*');
} else {
From 6380bda06a06a80c2a6e78e91aa987476332ed70 Mon Sep 17 00:00:00 2001
From: Steven Rowe
Date: Fri, 3 Jun 2011 15:06:17 +0000
Subject: [PATCH 10/29] script to poll release mirrors
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131062 13f79535-47bb-0310-9956-ffa450edef68
---
dev-tools/scripts/poll-mirrors.pl | 68 +++++++++++++++++++++++++++++++
1 file changed, 68 insertions(+)
create mode 100755 dev-tools/scripts/poll-mirrors.pl
diff --git a/dev-tools/scripts/poll-mirrors.pl b/dev-tools/scripts/poll-mirrors.pl
new file mode 100755
index 00000000000..b7222f9468b
--- /dev/null
+++ b/dev-tools/scripts/poll-mirrors.pl
@@ -0,0 +1,68 @@
+#!/usr/bin/perl
+#
+# poll-mirrors.pl
+#
+# This script is designed to poll download sites after posting a release
+# and print out notice as each becomes available. The RM can use this
+# script to delay the release announcement until the release can be
+# downloaded.
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use strict;
+use warnings;
+use Getopt::Long;
+use LWP::Simple;
+
+my $version;
+my $interval = 300;
+my $quiet = 0;
+
+my $result = GetOptions ("version=s" => \$version, "interval=i" => \$interval, "quiet" => \$quiet);
+
+my $usage = "$0 -v version [ -i interval (seconds; default: 300)] [ -quiet ]";
+
+unless ($result) {
+ print STDERR $usage;
+ exit(1);
+}
+unless (defined($version) && $version =~ /\d+(?:\.\d+)+/) {
+ print STDERR "You must specify the release version.\n$usage";
+ exit(1);
+}
+
+my $apache_backup_url = "http://www.apache.org/dist//lucene/java/$version/lucene-$version.tar.gz.asc";
+my $maven_url = "http://repo2.maven.org/maven2/org/apache/lucene/lucene-core/$version/lucene-core-$version.pom";
+
+my $apache_available = 0;
+my $maven_available = 0;
+
+until ($apache_available && $maven_available) {
+ unless ($apache_available) {
+ my $content = get($apache_backup_url);
+ $apache_available = defined($content);
+ print "\nDownloadable: $apache_backup_url\n" if ($apache_available);
+ }
+ unless ($maven_available) {
+ my $content = get($maven_url);
+ $maven_available = defined($content);
+ print "\nDownloadable: $maven_url\n" if ($maven_available);
+ }
+ print "." unless ($quiet);
+ sleep($interval) unless ($apache_available && $maven_available);
+}
From ff838ec92f2c0c6aa349200df810528deaea7ec0 Mon Sep 17 00:00:00 2001
From: Steven Rowe
Date: Fri, 3 Jun 2011 15:13:21 +0000
Subject: [PATCH 11/29] .tar.gz -> .tgz for lucene release tarballs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131064 13f79535-47bb-0310-9956-ffa450edef68
---
dev-tools/scripts/poll-mirrors.pl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dev-tools/scripts/poll-mirrors.pl b/dev-tools/scripts/poll-mirrors.pl
index b7222f9468b..8ab28d47aed 100755
--- a/dev-tools/scripts/poll-mirrors.pl
+++ b/dev-tools/scripts/poll-mirrors.pl
@@ -46,7 +46,7 @@ unless (defined($version) && $version =~ /\d+(?:\.\d+)+/) {
exit(1);
}
-my $apache_backup_url = "http://www.apache.org/dist//lucene/java/$version/lucene-$version.tar.gz.asc";
+my $apache_backup_url = "http://www.apache.org/dist//lucene/java/$version/lucene-$version.tgz.asc";
my $maven_url = "http://repo2.maven.org/maven2/org/apache/lucene/lucene-core/$version/lucene-core-$version.pom";
my $apache_available = 0;
From b1aa9f6d560b80041ca1529f3d9b4a7994c5d4c7 Mon Sep 17 00:00:00 2001
From: Steven Rowe
Date: Fri, 3 Jun 2011 15:26:37 +0000
Subject: [PATCH 12/29] turn off buffering of '.'s printed while you wait
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131073 13f79535-47bb-0310-9956-ffa450edef68
---
dev-tools/scripts/poll-mirrors.pl | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/dev-tools/scripts/poll-mirrors.pl b/dev-tools/scripts/poll-mirrors.pl
index 8ab28d47aed..ed9d75ae2b2 100755
--- a/dev-tools/scripts/poll-mirrors.pl
+++ b/dev-tools/scripts/poll-mirrors.pl
@@ -46,6 +46,10 @@ unless (defined($version) && $version =~ /\d+(?:\.\d+)+/) {
exit(1);
}
+my $previously_selected = select STDOUT;
+$| = 1;
+select $previously_selected;
+
my $apache_backup_url = "http://www.apache.org/dist//lucene/java/$version/lucene-$version.tgz.asc";
my $maven_url = "http://repo2.maven.org/maven2/org/apache/lucene/lucene-core/$version/lucene-core-$version.pom";
From 8feab1f620d219a71f488110de1b88b8410ecad8 Mon Sep 17 00:00:00 2001
From: Steven Rowe
Date: Fri, 3 Jun 2011 15:28:35 +0000
Subject: [PATCH 13/29] added comment describing turning off buffering of '.'s
printed while you wait
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131074 13f79535-47bb-0310-9956-ffa450edef68
---
dev-tools/scripts/poll-mirrors.pl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dev-tools/scripts/poll-mirrors.pl b/dev-tools/scripts/poll-mirrors.pl
index ed9d75ae2b2..5a0305bb72f 100755
--- a/dev-tools/scripts/poll-mirrors.pl
+++ b/dev-tools/scripts/poll-mirrors.pl
@@ -47,7 +47,7 @@ unless (defined($version) && $version =~ /\d+(?:\.\d+)+/) {
}
my $previously_selected = select STDOUT;
-$| = 1;
+$| = 1; # turn off buffering of STDOUT, so "."s are printed immediately
select $previously_selected;
my $apache_backup_url = "http://www.apache.org/dist//lucene/java/$version/lucene-$version.tgz.asc";
From ac2c62554d9f572c2371b8a9599fa0a8c379a7cd Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Fri, 3 Jun 2011 16:12:10 +0000
Subject: [PATCH 14/29] LUCENE-3168: Enable Throttling only during nightly
builds
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131085 13f79535-47bb-0310-9956-ffa450edef68
---
.../org/apache/lucene/util/LuceneTestCase.java | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
index 655d81b092a..322461e7645 100644
--- a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
@@ -60,6 +60,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
import org.junit.*;
import org.junit.rules.TestWatchman;
@@ -160,6 +161,8 @@ public abstract class LuceneTestCase extends Assert {
public static final String TEST_LINE_DOCS_FILE = System.getProperty("tests.linedocsfile", "europarl.lines.txt.gz");
/** whether or not to clean threads between test invocations: "false", "perMethod", "perClass" */
public static final String TEST_CLEAN_THREADS = System.getProperty("tests.cleanthreads", "perClass");
+ /** whether or not to clean threads between test invocations: "false", "perMethod", "perClass" */
+ public static final Throttling TEST_THROTTLING = TEST_NIGHTLY ? Throttling.SOMETIMES : Throttling.NEVER;
private static final Pattern codecWithParam = Pattern.compile("(.*)\\(\\s*(\\d+)\\s*\\)");
@@ -938,8 +941,9 @@ public abstract class LuceneTestCase extends Assert {
Directory impl = newDirectoryImpl(r, TEST_DIRECTORY);
MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl);
stores.put(dir, Thread.currentThread().getStackTrace());
+ dir.setThrottling(TEST_THROTTLING);
return dir;
- }
+ }
/**
* Returns a new Directory instance, with contents copied from the
@@ -985,6 +989,7 @@ public abstract class LuceneTestCase extends Assert {
dir.setLockFactory(lf);
}
stores.put(dir, Thread.currentThread().getStackTrace());
+ dir.setThrottling(TEST_THROTTLING);
return dir;
} catch (Exception e) {
throw new RuntimeException(e);
@@ -1003,6 +1008,7 @@ public abstract class LuceneTestCase extends Assert {
}
MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl);
stores.put(dir, Thread.currentThread().getStackTrace());
+ dir.setThrottling(TEST_THROTTLING);
return dir;
}
From 0c25f245505da154a800cc87f9004553ca73037c Mon Sep 17 00:00:00 2001
From: Ryan McKinley
Date: Fri, 3 Jun 2011 18:51:37 +0000
Subject: [PATCH 15/29] SOLR-2399: Solr Admin Interface, reworked
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1131137 13f79535-47bb-0310-9956-ffa450edef68
---
solr/src/webapp/web/css/screen.css | 105 +-
solr/src/webapp/web/index.jsp | 6 +-
solr/src/webapp/web/js/script.js | 991 +++++++++++-------
solr/src/webapp/web/tpl/analysis.html | 6 +
solr/src/webapp/web/tpl/cores.html | 13 +-
solr/src/webapp/web/tpl/dashboard.html | 104 +-
solr/src/webapp/web/tpl/schema-browser.html | 122 ++-
.../webapp/web/tpl/schema-browser_field.html | 109 --
.../webapp/web/tpl/schema-browser_index.html | 11 -
9 files changed, 787 insertions(+), 680 deletions(-)
delete mode 100644 solr/src/webapp/web/tpl/schema-browser_field.html
delete mode 100644 solr/src/webapp/web/tpl/schema-browser_index.html
diff --git a/solr/src/webapp/web/css/screen.css b/solr/src/webapp/web/css/screen.css
index fc50d0309de..53e0387f8b1 100644
--- a/solr/src/webapp/web/css/screen.css
+++ b/solr/src/webapp/web/css/screen.css
@@ -462,6 +462,7 @@ ul
#content #dashboard .block
{
+ background-image: none;
width: 49%;
}
@@ -550,85 +551,13 @@ ul
display: block;
}
-#content #dashboard #replication.is-master .slave
+#content #dashboard #replication #details table thead td span
{
display: none;
}
-#content #dashboard #replication table
-{
- border-collapse: collapse;
-}
-
-#content #dashboard #replication table th,
-#content #dashboard #replication table td
-{
- border: 1px solid #f0f0f0;
- padding: 2px 5px;
-}
-
-#content #dashboard #replication table thead td
-{
- border: 0;
-}
-
-#content #dashboard #replication table thead th,
-#content #dashboard #replication table tbody td
-{
- border-right: 0;
-}
-
-#content #dashboard #replication table thead th
-{
- border-top: 0;
- font-weight: bold;
-}
-
-#content #dashboard #replication table tbody th,
-#content #dashboard #replication table tbody td
-{
- border-bottom: 0;
- text-align: right;
-}
-
-#content #dashboard #replication table tbody th
-{
- border-left: 0;
-}
-
-#content #dashboard #replication table tbody th,
-#content #dashboard #replication dt
-{
- width: 100px;
-}
-
-#content #dashboard #replication dl
-{
- display: none;
- margin-top: 10px;
-}
-
-#content #dashboard #replication dt,
-#content #dashboard #replication dd
-{
- display: block;
- padding-top: 1px;
- padding-bottom: 1px;
-}
-
-#content #dashboard #replication dt
-{
- border-right: 1px solid #f0f0f0;
- float: left;
- padding-left: 5px;
- padding-right: 5px;
- margin-right: 3px;
- text-align: right;
-}
-
#content #dashboard #dataimport
{
- background-color: #0ff;
float: right;
}
@@ -711,6 +640,19 @@ ul
max-width: 99%;
}
+#content #analysis #analysis-error
+{
+ background-color: #f00;
+ background-image: url( ../img/ico/construction.png );
+ background-position: 10px 50%;
+ color: #fff;
+ display: none;
+ font-weight: bold;
+ margin-bottom: 20px;
+ padding: 10px;
+ padding-left: 35px;
+}
+
#content #analysis .analysis-result h2
{
position: relative;
@@ -1334,6 +1276,12 @@ ul
padding-left: 10px;
}
+#content #schema-browser #related #f-df-t
+{
+ border-bottom: 1px solid #f0f0f0;
+ padding-bottom: 15px;
+}
+
#content #schema-browser #related dl
{
margin-top: 15px;
@@ -1367,7 +1315,9 @@ ul
#content #schema-browser #related .dynamic-field .dynamic-field,
#content #schema-browser #related .dynamic-field .dynamic-field a,
#content #schema-browser #related .type .type,
-#content #schema-browser #related .type .type a
+#content #schema-browser #related .type .type a,
+#content #schema-browser #related .active,
+#content #schema-browser #related .active a
{
color: #333;
}
@@ -1378,6 +1328,11 @@ ul
color: #666;
}
+#content #schema-browser #data
+{
+ display: none;
+}
+
#content #schema-browser #data #index dt
{
display: none;
@@ -1491,6 +1446,7 @@ ul
#content #schema-browser #data #field .topterms-holder
{
+ display: none;
float: left;
}
@@ -2830,6 +2786,7 @@ ul
#content #replication #details table tbody .size
{
text-align: right;
+ white-space: nowrap;
}
#content #replication #details table tbody .generation div
diff --git a/solr/src/webapp/web/index.jsp b/solr/src/webapp/web/index.jsp
index dec2ddc4b34..a632b365327 100644
--- a/solr/src/webapp/web/index.jsp
+++ b/solr/src/webapp/web/index.jsp
@@ -35,14 +35,14 @@
This interface is work in progress. It works best in Chrome.