LUCENE-3191: add TopDocs.merge, TopGroups.merge, SearchGroup.merge, to make sharding easier

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1136080 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-06-15 15:12:48 +00:00
parent 3d8fc3de5d
commit a1e74fd6c9
29 changed files with 1304 additions and 171 deletions

View File

@ -520,6 +520,9 @@ New Features
algorithm over objects that implement the new TwoPhaseCommit interface (such
as IndexWriter). (Shai Erera)
* LUCENE-3191: Added TopDocs.merge, to facilitate merging results from
different shards (Uwe Schindler, Mike McCandless)
Build
* LUCENE-1344: Create OSGi bundle using dev-tools/maven.

View File

@ -75,6 +75,10 @@ New Features
allow an app to control which indexing changes must be visible to
which search requests. (Mike McCandless)
* LUCENE-3191: Added SearchGroup.merge and TopGroups.merge, to
facilitate doing grouping in a distributed environment (Uwe
Schindler, Mike McCandless)
API Changes
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.

View File

@ -33,7 +33,7 @@ import org.apache.lucene.util.BytesRef;
* This class will be removed in Lucene 5.0
*/
@Deprecated
public final class SlowCollatedStringComparator extends FieldComparator {
public final class SlowCollatedStringComparator extends FieldComparator<BytesRef> {
private final String[] values;
private DocTerms currentDocTerms;
@ -99,8 +99,22 @@ public final class SlowCollatedStringComparator extends FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public BytesRef value(int slot) {
final String s = values[slot];
return s == null ? null : new BytesRef(values[slot]);
}
@Override
public int compareValues(BytesRef first, BytesRef second) {
if (first == null) {
if (second == null) {
return 0;
}
return -1;
} else if (second == null) {
return 1;
} else {
return collator.compare(first, second);
}
}
}

View File

@ -35,16 +35,15 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
private DistanceScoreDocLookupComparator dsdlc;
public DistanceFieldComparatorSource(Filter distanceFilter) {
this.distanceFilter = (DistanceFilter) distanceFilter;
}
public void cleanUp() {
distanceFilter = null;
if (dsdlc != null)
if (dsdlc != null) {
dsdlc.cleanUp();
}
dsdlc = null;
}
@ -56,7 +55,7 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
return dsdlc;
}
private class DistanceScoreDocLookupComparator extends FieldComparator {
private class DistanceScoreDocLookupComparator extends FieldComparator<Double> {
private double[] values;
private double bottom;
@ -103,7 +102,6 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
@Override
public void setBottom(int slot) {
this.bottom = values[slot];
}
@Override
@ -116,9 +114,8 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
}
@Override
public Comparable<Double> value(int slot) {
public Double value(int slot) {
return values[slot];
}
}
}

View File

@ -60,6 +60,11 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader {
readerContext = new AtomicReaderContext(this); // emulate atomic reader!
}
@Override
public String toString() {
return "SlowMultiReaderWrapper(" + in + ")";
}
@Override
public Fields fields() throws IOException {
return MultiFields.getFields(in);

View File

@ -96,7 +96,7 @@ import org.apache.lucene.util.packed.PackedInts;
*
* @lucene.experimental
*/
public abstract class FieldComparator {
public abstract class FieldComparator<T> {
/**
* Compare hit at slot1 with hit at slot2.
@ -176,13 +176,21 @@ public abstract class FieldComparator {
* Return the actual value in the slot.
*
* @param slot the value
* @return value in this slot upgraded to Comparable
* @return value in this slot
*/
public abstract Comparable<?> value(int slot);
public abstract T value(int slot);
/** Returns -1 if first is less than second. Default
* impl to assume the type implements Comparable and
* invoke .compareTo; be sure to override this method if
* your FieldComparator's type isn't a Comparable or
* if your values may sometimes be null */
@SuppressWarnings("unchecked")
public int compareValues(T first, T second) {
return ((Comparable<T>) first).compareTo(second);
}
public static abstract class NumericComparator<T extends CachedArray> extends FieldComparator {
public static abstract class NumericComparator<T extends CachedArray, U extends Number> extends FieldComparator<U> {
protected final CachedArrayCreator<T> creator;
protected T cached;
protected final boolean checkMissing;
@ -203,7 +211,7 @@ public abstract class FieldComparator {
/** Parses field's values as byte (using {@link
* FieldCache#getBytes} and sorts by ascending value */
public static final class ByteComparator extends NumericComparator<ByteValues> {
public static final class ByteComparator extends NumericComparator<ByteValues,Byte> {
private byte[] docValues;
private final byte[] values;
private final byte missingValue;
@ -252,7 +260,7 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Byte value(int slot) {
return Byte.valueOf(values[slot]);
}
}
@ -260,13 +268,12 @@ public abstract class FieldComparator {
/** Parses field's values as double (using {@link
* FieldCache#getDoubles} and sorts by ascending value */
public static final class DoubleComparator extends NumericComparator<DoubleValues> {
public static final class DoubleComparator extends NumericComparator<DoubleValues,Double> {
private double[] docValues;
private final double[] values;
private final double missingValue;
private double bottom;
DoubleComparator(int numHits, DoubleValuesCreator creator, Double missingValue ) {
super( creator, missingValue != null );
values = new double[numHits];
@ -324,13 +331,13 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Double value(int slot) {
return Double.valueOf(values[slot]);
}
}
/** Uses float index values to sort by ascending value */
public static final class FloatDocValuesComparator extends FieldComparator {
public static final class FloatDocValuesComparator extends FieldComparator<Double> {
private final double[] values;
private Source currentReaderValues;
private final String field;
@ -386,14 +393,14 @@ public abstract class FieldComparator {
}
@Override
public Comparable<Double> value(int slot) {
public Double value(int slot) {
return Double.valueOf(values[slot]);
}
}
/** Parses field's values as float (using {@link
* FieldCache#getFloats} and sorts by ascending value */
public static final class FloatComparator extends NumericComparator<FloatValues> {
public static final class FloatComparator extends NumericComparator<FloatValues,Float> {
private float[] docValues;
private final float[] values;
private final float missingValue;
@ -460,14 +467,14 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Float value(int slot) {
return Float.valueOf(values[slot]);
}
}
/** Parses field's values as short (using {@link
* FieldCache#getShorts} and sorts by ascending value */
public static final class ShortComparator extends NumericComparator<ShortValues> {
public static final class ShortComparator extends NumericComparator<ShortValues,Short> {
private short[] docValues;
private final short[] values;
private short bottom;
@ -516,14 +523,14 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Short value(int slot) {
return Short.valueOf(values[slot]);
}
}
/** Parses field's values as int (using {@link
* FieldCache#getInts} and sorts by ascending value */
public static final class IntComparator extends NumericComparator<IntValues> {
public static final class IntComparator extends NumericComparator<IntValues,Integer> {
private int[] docValues;
private final int[] values;
private int bottom; // Value of bottom of queue
@ -594,13 +601,13 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Integer value(int slot) {
return Integer.valueOf(values[slot]);
}
}
/** Loads int index values and sorts by ascending value. */
public static final class IntDocValuesComparator extends FieldComparator {
public static final class IntDocValuesComparator extends FieldComparator<Long> {
private final long[] values;
private Source currentReaderValues;
private final String field;
@ -660,14 +667,14 @@ public abstract class FieldComparator {
}
@Override
public Comparable<Long> value(int slot) {
public Long value(int slot) {
return Long.valueOf(values[slot]);
}
}
/** Parses field's values as long (using {@link
* FieldCache#getLongs} and sorts by ascending value */
public static final class LongComparator extends NumericComparator<LongValues> {
public static final class LongComparator extends NumericComparator<LongValues,Long> {
private long[] docValues;
private final long[] values;
private long bottom;
@ -735,7 +742,7 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Long value(int slot) {
return Long.valueOf(values[slot]);
}
}
@ -746,7 +753,7 @@ public abstract class FieldComparator {
* using {@link TopScoreDocCollector} directly (which {@link
* IndexSearcher#search} uses when no {@link Sort} is
* specified). */
public static final class RelevanceComparator extends FieldComparator {
public static final class RelevanceComparator extends FieldComparator<Float> {
private final float[] scores;
private float bottom;
private Scorer scorer;
@ -791,15 +798,21 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Float value(int slot) {
return Float.valueOf(scores[slot]);
}
// Override because we sort reverse of natural Float order:
@Override
public int compareValues(Float first, Float second) {
// Reversed intentionally because relevance by default
// sorts descending:
return second.compareTo(first);
}
}
/** Sorts by ascending docID */
public static final class DocComparator extends FieldComparator {
public static final class DocComparator extends FieldComparator<Integer> {
private final int[] docIDs;
private int docBase;
private int bottom;
@ -840,7 +853,7 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public Integer value(int slot) {
return Integer.valueOf(docIDs[slot]);
}
}
@ -854,7 +867,7 @@ public abstract class FieldComparator {
* to large results, this comparator will be much faster
* than {@link TermValComparator}. For very small
* result sets it may be slower. */
public static final class TermOrdValComparator extends FieldComparator {
public static final class TermOrdValComparator extends FieldComparator<BytesRef> {
/** @lucene.internal */
final int[] ords;
/** @lucene.internal */
@ -920,7 +933,7 @@ public abstract class FieldComparator {
* the underlying array access when looking up doc->ord
* @lucene.internal
*/
abstract class PerSegmentComparator extends FieldComparator {
abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
@Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
@ -938,7 +951,7 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public BytesRef value(int slot) {
return TermOrdValComparator.this.value(slot);
}
}
@ -1244,7 +1257,7 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public BytesRef value(int slot) {
return values[slot];
}
}
@ -1253,7 +1266,7 @@ public abstract class FieldComparator {
* comparisons are done using BytesRef.compareTo, which is
* slow for medium to large result sets but possibly
* very fast for very small results sets. */
public static final class TermValComparator extends FieldComparator {
public static final class TermValComparator extends FieldComparator<BytesRef> {
private BytesRef[] values;
private DocTerms docTerms;
@ -1316,7 +1329,7 @@ public abstract class FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public BytesRef value(int slot) {
return values[slot];
}
}

View File

@ -40,12 +40,13 @@ public class FieldDoc extends ScoreDoc {
/** Expert: The values which are used to sort the referenced document.
* The order of these will match the original sort criteria given by a
* Sort object. Each Object will be either an Integer, Float or String,
* depending on the type of values in the terms of the original field.
* Sort object. Each Object will have been returned from
* the <code>value</code> method corresponding
* FieldComparator used to sort this field.
* @see Sort
* @see IndexSearcher#search(Query,Filter,int,Sort)
*/
public Comparable[] fields;
public Object[] fields;
/** Expert: Creates one of these objects with empty sort information. */
public FieldDoc (int doc, float score) {
@ -53,7 +54,7 @@ public class FieldDoc extends ScoreDoc {
}
/** Expert: Creates one of these objects with the given sort information. */
public FieldDoc (int doc, float score, Comparable[] fields) {
public FieldDoc (int doc, float score, Object[] fields) {
super (doc, score);
this.fields = fields;
}

View File

@ -200,7 +200,7 @@ public abstract class FieldValueHitQueue extends PriorityQueue<FieldValueHitQueu
*/
FieldDoc fillFields(final Entry entry) {
final int n = comparators.length;
final Comparable<?>[] fields = new Comparable[n];
final Object[] fields = new Object[n];
for (int i = 0; i < n; ++i) {
fields[i] = comparators[i].value(entry.slot);
}

View File

@ -91,10 +91,10 @@ public class SortField {
public static final int BYTES = 12;
/** Represents sorting by document score (relevance). */
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
public static final SortField FIELD_SCORE = new SortField(null, SCORE);
/** Represents sorting by document number (index order). */
public static final SortField FIELD_DOC = new SortField (null, DOC);
public static final SortField FIELD_DOC = new SortField(null, DOC);
private String field;
private int type; // defaults to determining type dynamically
@ -111,7 +111,7 @@ public class SortField {
* <code>type</code> is SCORE or DOC.
* @param type Type of values in the terms.
*/
public SortField (String field, int type) {
public SortField(String field, int type) {
initFieldType(field, type);
}
@ -122,7 +122,7 @@ public class SortField {
* @param type Type of values in the terms.
* @param reverse True if natural order should be reversed.
*/
public SortField (String field, int type, boolean reverse) {
public SortField(String field, int type, boolean reverse) {
initFieldType(field, type);
this.reverse = reverse;
}
@ -140,7 +140,7 @@ public class SortField {
* @deprecated (4.0) use EntryCreator version
*/
@Deprecated
public SortField (String field, FieldCache.Parser parser) {
public SortField(String field, FieldCache.Parser parser) {
this(field, parser, false);
}
@ -158,7 +158,7 @@ public class SortField {
* @deprecated (4.0) use EntryCreator version
*/
@Deprecated
public SortField (String field, FieldCache.Parser parser, boolean reverse) {
public SortField(String field, FieldCache.Parser parser, boolean reverse) {
if (field == null) {
throw new IllegalArgumentException("field can only be null when type is SCORE or DOC");
}
@ -225,7 +225,7 @@ public class SortField {
* @param field Name of field to sort by; cannot be <code>null</code>.
* @param comparator Returns a comparator for sorting hits.
*/
public SortField (String field, FieldComparatorSource comparator) {
public SortField(String field, FieldComparatorSource comparator) {
initFieldType(field, CUSTOM);
this.comparatorSource = comparator;
}
@ -235,7 +235,7 @@ public class SortField {
* @param comparator Returns a comparator for sorting hits.
* @param reverse True if natural order should be reversed.
*/
public SortField (String field, FieldComparatorSource comparator, boolean reverse) {
public SortField(String field, FieldComparatorSource comparator, boolean reverse) {
initFieldType(field, CUSTOM);
this.reverse = reverse;
this.comparatorSource = comparator;

View File

@ -89,7 +89,7 @@ public class TermQuery extends Query {
public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
final String field = term.field();
final IndexReader reader = context.reader;
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight is not the same as the current reader's top-reader";
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
final TermState state = termStates
.get(context.ord);
if (state == null) { // term is not present in that reader

View File

@ -17,6 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.PriorityQueue;
/** Represents hits returned by {@link
* IndexSearcher#search(Query,Filter,int)} and {@link
* IndexSearcher#search(Query,int)}. */
@ -52,4 +56,208 @@ public class TopDocs {
this.scoreDocs = scoreDocs;
this.maxScore = maxScore;
}
// Refers to one hit:
private static class ShardRef {
// Which shard (index into shardHits[]):
final int shardIndex;
// Which hit within the shard:
int hitIndex;
public ShardRef(int shardIndex) {
this.shardIndex = shardIndex;
}
@Override
public String toString() {
return "ShardRef(shardIndex=" + shardIndex + " hitIndex=" + hitIndex + ")";
}
};
// Specialized MergeSortQueue that just merges by
// relevance score, descending:
private static class ScoreMergeSortQueue extends PriorityQueue<ShardRef> {
final ScoreDoc[][] shardHits;
public ScoreMergeSortQueue(TopDocs[] shardHits) {
super(shardHits.length);
this.shardHits = new ScoreDoc[shardHits.length][];
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
this.shardHits[shardIDX] = shardHits[shardIDX].scoreDocs;
}
}
// Returns true if first is < second
public boolean lessThan(ShardRef first, ShardRef second) {
assert first != second;
final float firstScore = shardHits[first.shardIndex][first.hitIndex].score;
final float secondScore = shardHits[second.shardIndex][second.hitIndex].score;
if (firstScore < secondScore) {
return false;
} else if (firstScore > secondScore) {
return true;
} else {
// Tie break: earlier shard wins
if (first.shardIndex < second.shardIndex) {
return true;
} else if (first.shardIndex > second.shardIndex) {
return false;
} else {
// Tie break in same shard: resolve however the
// shard had resolved it:
assert first.hitIndex != second.hitIndex;
return first.hitIndex < second.hitIndex;
}
}
}
}
private static class MergeSortQueue extends PriorityQueue<ShardRef> {
// These are really FieldDoc instances:
final ScoreDoc[][] shardHits;
final FieldComparator[] comparators;
final int[] reverseMul;
public MergeSortQueue(Sort sort, TopDocs[] shardHits) throws IOException {
super(shardHits.length);
this.shardHits = new ScoreDoc[shardHits.length][];
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
final ScoreDoc[] shard = shardHits[shardIDX].scoreDocs;
//System.out.println(" init shardIdx=" + shardIDX + " hits=" + shard);
if (shard != null) {
this.shardHits[shardIDX] = shard;
// Fail gracefully if API is misused:
for(int hitIDX=0;hitIDX<shard.length;hitIDX++) {
final ScoreDoc sd = shard[hitIDX];
if (!(sd instanceof FieldDoc)) {
throw new IllegalArgumentException("shard " + shardIDX + " was not sorted by the provided Sort (expected FieldDoc but got ScoreDoc)");
}
final FieldDoc fd = (FieldDoc) sd;
if (fd.fields == null) {
throw new IllegalArgumentException("shard " + shardIDX + " did not set sort field values (FieldDoc.fields is null); you must pass fillFields=true to IndexSearcher.search on each shard");
}
}
}
}
final SortField[] sortFields = sort.getSort();
comparators = new FieldComparator[sortFields.length];
reverseMul = new int[sortFields.length];
for(int compIDX=0;compIDX<sortFields.length;compIDX++) {
final SortField sortField = sortFields[compIDX];
comparators[compIDX] = sortField.getComparator(1, compIDX);
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
}
}
// Returns true if first is < second
@SuppressWarnings("unchecked")
public boolean lessThan(ShardRef first, ShardRef second) {
assert first != second;
final FieldDoc firstFD = (FieldDoc) shardHits[first.shardIndex][first.hitIndex];
final FieldDoc secondFD = (FieldDoc) shardHits[second.shardIndex][second.hitIndex];
//System.out.println(" lessThan:\n first=" + first + " doc=" + firstFD.doc + " score=" + firstFD.score + "\n second=" + second + " doc=" + secondFD.doc + " score=" + secondFD.score);
for(int compIDX=0;compIDX<comparators.length;compIDX++) {
final FieldComparator comp = comparators[compIDX];
//System.out.println(" cmp idx=" + compIDX + " cmp1=" + firstFD.fields[compIDX] + " cmp2=" + secondFD.fields[compIDX] + " reverse=" + reverseMul[compIDX]);
final int cmp = reverseMul[compIDX] * comp.compareValues(firstFD.fields[compIDX], secondFD.fields[compIDX]);
if (cmp != 0) {
//System.out.println(" return " + (cmp < 0));
return cmp < 0;
}
}
// Tie break: earlier shard wins
if (first.shardIndex < second.shardIndex) {
//System.out.println(" return tb true");
return true;
} else if (first.shardIndex > second.shardIndex) {
//System.out.println(" return tb false");
return false;
} else {
// Tie break in same shard: resolve however the
// shard had resolved it:
//System.out.println(" return tb " + (first.hitIndex < second.hitIndex));
assert first.hitIndex != second.hitIndex;
return first.hitIndex < second.hitIndex;
}
}
}
/** Returned from {@link #merge}, to include the merged
* TopDocs as well as the reference to which original
* TopDocs shard each hit came from.
*
* @lucene.experimental */
public static class TopDocsAndShards extends TopDocs {
/** Parallel array matching <code>hits.scoreDocs</code> */
public final int[] shardIndex;
public TopDocsAndShards(int totalHits, ScoreDoc[] scoreDocs, float maxScore, int[] shardIndex) {
super(totalHits, scoreDocs, maxScore);
this.shardIndex = shardIndex;
}
}
/** Returns a new TopDocs, containing topN results across
* the provided TopDocs, sorting by the specified {@link
* Sort}. Each of the TopDocs must have been sorted by
* the same Sort, and sort field values must have been
* filled (ie, <code>fillFields=true</code> must be
* passed to {@link
* TopFieldCollector#create}.
*
* <p>Pass sort=null to merge sort by score descending.
*
* @lucene.experimental */
public static TopDocsAndShards merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException {
final PriorityQueue<ShardRef> queue;
if (sort == null) {
queue = new ScoreMergeSortQueue(shardHits);
} else {
queue = new MergeSortQueue(sort, shardHits);
}
int totalHitCount = 0;
float maxScore = Float.MIN_VALUE;
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
final TopDocs shard = shardHits[shardIDX];
if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
totalHitCount += shard.totalHits;
queue.add(new ShardRef(shardIDX));
maxScore = Math.max(maxScore, shard.getMaxScore());
//System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore());
}
}
final ScoreDoc[] hits = new ScoreDoc[Math.min(topN, totalHitCount)];
final int[] shardIndex = new int[hits.length];
int hitUpto = 0;
while(hitUpto < hits.length) {
assert queue.size() > 0;
ShardRef ref = queue.pop();
hits[hitUpto] = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
shardIndex[hitUpto] = ref.shardIndex;
//System.out.println(" hitUpto=" + hitUpto);
//System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
hitUpto++;
if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
// Not done with this these TopDocs yet:
queue.add(ref);
}
}
return new TopDocsAndShards(totalHitCount, hits, maxScore, shardIndex);
}
}

View File

@ -308,7 +308,14 @@ public class RandomIndexWriter implements Closeable {
return getReader(true);
}
private boolean doRandomOptimize = true;
public void setDoRandomOptimize(boolean v) {
doRandomOptimize = v;
}
private void doRandomOptimize() throws IOException {
if (doRandomOptimize) {
final int segCount = w.getSegmentCount();
if (r.nextBoolean() || segCount == 0) {
// full optimize
@ -319,6 +326,7 @@ public class RandomIndexWriter implements Closeable {
w.optimize(limit);
assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
}
}
switchDoDocValues();
}

View File

@ -27,10 +27,10 @@ import java.io.OutputStream;
import java.io.PrintStream;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.List;
import java.util.Random;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
@ -46,6 +46,9 @@ import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.junit.Assert;
@ -468,4 +471,24 @@ public class _TestUtil {
newName.append(suffix);
return new File(directory, newName.toString());
}
public static void assertEquals(TopDocs expected, TopDocs actual) {
Assert.assertEquals("wrong total hits", expected.totalHits, actual.totalHits);
Assert.assertEquals("wrong maxScore", expected.getMaxScore(), actual.getMaxScore(), 0.0);
Assert.assertEquals("wrong hit count", expected.scoreDocs.length, actual.scoreDocs.length);
for(int hitIDX=0;hitIDX<expected.scoreDocs.length;hitIDX++) {
final ScoreDoc expectedSD = expected.scoreDocs[hitIDX];
final ScoreDoc actualSD = actual.scoreDocs[hitIDX];
Assert.assertEquals("wrong hit docID", expectedSD.doc, actualSD.doc);
Assert.assertEquals("wrong hit score", expectedSD.score, actualSD.score, 0.0);
if (expectedSD instanceof FieldDoc) {
Assert.assertTrue(actualSD instanceof FieldDoc);
Assert.assertEquals("wrong sort field values",
((FieldDoc) expectedSD).fields,
((FieldDoc) actualSD).fields);
} else {
Assert.assertFalse(actualSD instanceof FieldDoc);
}
}
}
}

View File

@ -103,7 +103,7 @@ final class JustCompileSearch {
}
static final class JustCompileFieldComparator extends FieldComparator {
static final class JustCompileFieldComparator extends FieldComparator<Object> {
@Override
public int compare(int slot1, int slot2) {
@ -132,7 +132,7 @@ final class JustCompileSearch {
}
@Override
public Comparable<?> value(int slot) {
public Object value(int slot) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}

View File

@ -139,7 +139,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
@Override
public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator() {
return new FieldComparator<Integer>() {
FieldCache.DocTermsIndex idIndex;
private final int[] values = new int[numHits];
@ -184,7 +184,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
}
@Override
public Comparable<?> value(int slot) {
public Integer value(int slot) {
return Integer.valueOf(values[slot]);
}
};

View File

@ -511,7 +511,7 @@ public class TestSort extends LuceneTestCase {
assertMatches (empty, queryX, sort, "");
}
static class MyFieldComparator extends FieldComparator {
static class MyFieldComparator extends FieldComparator<Integer> {
int[] docValues;
int[] slotValues;
int bottomValue;
@ -527,6 +527,7 @@ public class TestSort extends LuceneTestCase {
@Override
public int compare(int slot1, int slot2) {
// values are small enough that overflow won't happen
return slotValues[slot1] - slotValues[slot2];
}
@ -553,7 +554,7 @@ public class TestSort extends LuceneTestCase {
}
@Override
public Comparable<?> value(int slot) {
public Integer value(int slot) {
return Integer.valueOf(slotValues[slot]);
}
}

View File

@ -0,0 +1,244 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util._TestUtil;
public class TestTopDocsMerge extends LuceneTestCase {
private static class ShardSearcher extends IndexSearcher {
private final IndexReader.AtomicReaderContext[] ctx;
public ShardSearcher(IndexReader.AtomicReaderContext ctx, IndexReader.ReaderContext parent) {
super(parent);
this.ctx = new IndexReader.AtomicReaderContext[] {ctx};
}
public void search(Weight weight, Collector collector) throws IOException {
search(ctx, weight, null, collector);
}
public TopDocs search(Weight weight, int topN) throws IOException {
return search(ctx, weight, null, topN);
}
@Override
public String toString() {
return "ShardSearcher(" + ctx[0] + ")";
}
}
public void testSort() throws Exception {
IndexReader reader = null;
Directory dir = null;
final int numDocs = atLeast(1000);
//final int numDocs = atLeast(50);
final String[] tokens = new String[] {"a", "b", "c", "d", "e"};
if (VERBOSE) {
System.out.println("TEST: make index");
}
{
dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
// w.setDoRandomOptimize(false);
// w.w.getConfig().setMaxBufferedDocs(atLeast(100));
final String[] content = new String[atLeast(20)];
for(int contentIDX=0;contentIDX<content.length;contentIDX++) {
final StringBuilder sb = new StringBuilder();
final int numTokens = _TestUtil.nextInt(random, 1, 10);
for(int tokenIDX=0;tokenIDX<numTokens;tokenIDX++) {
sb.append(tokens[random.nextInt(tokens.length)]).append(' ');
}
content[contentIDX] = sb.toString();
}
for(int docIDX=0;docIDX<numDocs;docIDX++) {
final Document doc = new Document();
doc.add(newField("string", _TestUtil.randomRealisticUnicodeString(random), Field.Index.NOT_ANALYZED));
doc.add(newField("text", content[random.nextInt(content.length)], Field.Index.ANALYZED));
doc.add(new NumericField("float").setFloatValue(random.nextFloat()));
final int intValue;
if (random.nextInt(100) == 17) {
intValue = Integer.MIN_VALUE;
} else if (random.nextInt(100) == 17) {
intValue = Integer.MAX_VALUE;
} else {
intValue = random.nextInt();
}
doc.add(new NumericField("int").setIntValue(intValue));
if (VERBOSE) {
System.out.println(" doc=" + doc);
}
w.addDocument(doc);
}
reader = w.getReader();
w.close();
}
// NOTE: sometimes reader has just one segment, which is
// important to test
final IndexSearcher searcher = newSearcher(reader);
IndexReader[] subReaders = searcher.getIndexReader().getSequentialSubReaders();
if (subReaders == null) {
subReaders = new IndexReader[] {searcher.getIndexReader()};
}
final ShardSearcher[] subSearchers = new ShardSearcher[subReaders.length];
final IndexReader.ReaderContext ctx = searcher.getTopReaderContext();
if (ctx instanceof IndexReader.AtomicReaderContext) {
assert subSearchers.length == 1;
subSearchers[0] = new ShardSearcher((IndexReader.AtomicReaderContext) ctx, ctx);
} else {
final IndexReader.CompositeReaderContext compCTX = (IndexReader.CompositeReaderContext) ctx;
for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
subSearchers[searcherIDX] = new ShardSearcher(compCTX.leaves[searcherIDX], compCTX);
}
}
final List<SortField> sortFields = new ArrayList<SortField>();
sortFields.add(new SortField("string", SortField.STRING, true));
sortFields.add(new SortField("string", SortField.STRING, false));
sortFields.add(new SortField("int", SortField.INT, true));
sortFields.add(new SortField("int", SortField.INT, false));
sortFields.add(new SortField("float", SortField.FLOAT, true));
sortFields.add(new SortField("float", SortField.FLOAT, false));
sortFields.add(new SortField(null, SortField.SCORE, true));
sortFields.add(new SortField(null, SortField.SCORE, false));
sortFields.add(new SortField(null, SortField.DOC, true));
sortFields.add(new SortField(null, SortField.DOC, false));
final int[] docStarts = new int[subSearchers.length];
int docBase = 0;
for(int subIDX=0;subIDX<docStarts.length;subIDX++) {
docStarts[subIDX] = docBase;
docBase += subReaders[subIDX].maxDoc();
//System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);
}
for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
// TODO: custom FieldComp...
final Query query = new TermQuery(new Term("text", tokens[random.nextInt(tokens.length)]));
final Sort sort;
if (random.nextInt(10) == 4) {
// Sort by score
sort = null;
} else {
final SortField[] randomSortFields = new SortField[_TestUtil.nextInt(random, 1, 3)];
for(int sortIDX=0;sortIDX<randomSortFields.length;sortIDX++) {
randomSortFields[sortIDX] = sortFields.get(random.nextInt(sortFields.size()));
}
sort = new Sort(randomSortFields);
}
final int numHits = _TestUtil.nextInt(random, 1, numDocs+5);
//final int numHits = 5;
if (VERBOSE) {
System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
}
// First search on whole index:
final TopDocs topHits;
if (sort == null) {
topHits = searcher.search(query, numHits);
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
searcher.search(query, c);
topHits = c.topDocs(0, numHits);
}
if (VERBOSE) {
System.out.println(" top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
if (topHits.scoreDocs != null) {
for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
final ScoreDoc sd = topHits.scoreDocs[hitIDX];
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
// ... then all shards:
final Weight w = query.weight(searcher);
final TopDocs[] shardHits = new TopDocs[subSearchers.length];
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
final TopDocs subHits;
final ShardSearcher subSearcher = subSearchers[shardIDX];
if (sort == null) {
subHits = subSearcher.search(w, numHits);
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
subSearcher.search(w, c);
subHits = c.topDocs(0, numHits);
}
shardHits[shardIDX] = subHits;
if (VERBOSE) {
System.out.println(" shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
if (subHits.scoreDocs != null) {
for(ScoreDoc sd : subHits.scoreDocs) {
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
}
// Merge:
final TopDocs.TopDocsAndShards mergedHits = TopDocs.merge(sort, numHits, shardHits);
if (mergedHits.scoreDocs != null) {
// Make sure the returned shards are correct:
for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
assertEquals("doc=" + sd.doc + " wrong shard",
ReaderUtil.subIndex(sd.doc, docStarts),
mergedHits.shardIndex[hitIDX]);
}
}
_TestUtil.assertEquals(topHits, mergedHits);
}
searcher.close();
reader.close();
dir.close();
}
}

View File

@ -122,7 +122,7 @@ abstract public class AbstractFirstPassGroupingCollector<GROUP_VALUE_TYPE> exten
SearchGroup<GROUP_VALUE_TYPE> searchGroup = new SearchGroup<GROUP_VALUE_TYPE>();
searchGroup.groupValue = group.groupValue;
if (fillFields) {
searchGroup.sortValues = new Comparable[sortFieldCount];
searchGroup.sortValues = new Object[sortFieldCount];
for(int sortFieldIDX=0;sortFieldIDX<sortFieldCount;sortFieldIDX++) {
searchGroup.sortValues[sortFieldIDX] = comparators[sortFieldIDX].value(group.comparatorSlot);
}

View File

@ -348,7 +348,7 @@ public class BlockGroupingCollector extends Collector {
}
totalGroupedHitCount += og.count;
final Comparable[] groupSortValues;
final Object[] groupSortValues;
if (fillSortFields) {
groupSortValues = new Comparable[comparators.length];

View File

@ -40,13 +40,13 @@ public class GroupDocs<GROUP_VALUE_TYPE> {
/** Matches the groupSort passed to {@link
* AbstractFirstPassGroupingCollector}. */
public final Comparable[] groupSortValues;
public final Object[] groupSortValues;
public GroupDocs(float maxScore,
int totalHits,
ScoreDoc[] scoreDocs,
GROUP_VALUE_TYPE groupValue,
Comparable[] groupSortValues) {
Object[] groupSortValues) {
this.maxScore = maxScore;
this.totalHits = totalHits;
this.scoreDocs = scoreDocs;

View File

@ -0,0 +1,36 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.ScoreDoc;
public class GroupDocsAndShards<GROUP_VALUE_TYPE> extends GroupDocs<GROUP_VALUE_TYPE> {
public final int[] shardIndex;
public GroupDocsAndShards(float maxScore,
int totalHits,
ScoreDoc[] scoreDocs,
GROUP_VALUE_TYPE groupValue,
Object[] groupSortValues,
int[] shardIndex) {
super(maxScore, totalHits, scoreDocs, groupValue, groupSortValues);
this.shardIndex = shardIndex;
}
}

View File

@ -17,6 +17,14 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
import java.io.IOException;
import java.util.*;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
/**
* Represents a group that is found during the first pass search.
*
@ -27,6 +35,287 @@ public class SearchGroup<GROUP_VALUE_TYPE> {
/** The value that defines this group */
public GROUP_VALUE_TYPE groupValue;
/** The sort values used during sorting. Can be <code>null</code>. */
public Comparable[] sortValues;
/** The sort values used during sorting. These are the
* groupSort field values of the highest rank document
* (by the groupSort) within the group. Can be
* <code>null</code> if <code>fillFields=false</code> had
* been passed to {@link AbstractFirstPassGroupingCollector#getTopGroups} */
public Object[] sortValues;
@Override
public String toString() {
return("SearchGroup(groupValue=" + groupValue + " sortValues=" + Arrays.toString(sortValues) + ")");
}
private static class ShardIter<T> {
public final Iterator<SearchGroup<T>> iter;
public final int shardIndex;
public ShardIter(Collection<SearchGroup<T>> shard, int shardIndex) {
this.shardIndex = shardIndex;
iter = shard.iterator();
assert iter.hasNext();
}
public SearchGroup<T> next() {
assert iter.hasNext();
final SearchGroup<T> group = iter.next();
if (group.sortValues == null) {
throw new IllegalArgumentException("group.sortValues is null; you must pass fillFields=true to the first pass collector");
}
return group;
}
@Override
public String toString() {
return "ShardIter(shard=" + shardIndex + ")";
}
}
// Holds all shards currently on the same group
private static class MergedGroup<T> {
// groupValue may be null!
public final T groupValue;
public Object[] topValues;
public final List<ShardIter<T>> shards = new ArrayList<ShardIter<T>>();
public int minShardIndex;
public boolean processed;
public boolean inQueue;
public MergedGroup(T groupValue) {
this.groupValue = groupValue;
}
// Only for assert
private boolean neverEquals(Object _other) {
if (_other instanceof MergedGroup) {
MergedGroup other = (MergedGroup) _other;
if (groupValue == null) {
assert other.groupValue != null;
} else {
assert !groupValue.equals(other.groupValue);
}
}
return true;
}
@Override
public boolean equals(Object _other) {
// We never have another MergedGroup instance with
// same groupValue
assert neverEquals(_other);
if (_other instanceof MergedGroup) {
MergedGroup other = (MergedGroup) _other;
if (groupValue == null) {
return other == null;
} else {
return groupValue.equals(other);
}
} else {
return false;
}
}
@Override
public int hashCode() {
if (groupValue == null) {
return 0;
} else {
return groupValue.hashCode();
}
}
}
private static class GroupComparator<T> implements Comparator<MergedGroup<T>> {
public final FieldComparator[] comparators;
public final int[] reversed;
public GroupComparator(Sort groupSort) throws IOException {
final SortField[] sortFields = groupSort.getSort();
comparators = new FieldComparator[sortFields.length];
reversed = new int[sortFields.length];
for (int compIDX = 0; compIDX < sortFields.length; compIDX++) {
final SortField sortField = sortFields[compIDX];
comparators[compIDX] = sortField.getComparator(1, compIDX);
reversed[compIDX] = sortField.getReverse() ? -1 : 1;
}
}
@SuppressWarnings("unchecked")
public int compare(MergedGroup<T> group, MergedGroup<T> other) {
if (group == other) {
return 0;
}
//System.out.println("compare group=" + group + " other=" + other);
final Object[] groupValues = group.topValues;
final Object[] otherValues = other.topValues;
//System.out.println(" groupValues=" + groupValues + " otherValues=" + otherValues);
for (int compIDX = 0;compIDX < comparators.length; compIDX++) {
final int c = reversed[compIDX] * comparators[compIDX].compareValues(groupValues[compIDX],
otherValues[compIDX]);
if (c != 0) {
return c;
}
}
// Tie break by min shard index:
assert group.minShardIndex != other.minShardIndex;
return group.minShardIndex - other.minShardIndex;
}
}
private static class GroupMerger<T> {
private final GroupComparator<T> groupComp;
private final SortedSet<MergedGroup<T>> queue;
private final Map<T,MergedGroup<T>> groupsSeen;
public GroupMerger(Sort groupSort) throws IOException {
groupComp = new GroupComparator<T>(groupSort);
queue = new TreeSet<MergedGroup<T>>(groupComp);
groupsSeen = new HashMap<T,MergedGroup<T>>();
}
@SuppressWarnings("unchecked")
private void updateNextGroup(int topN, ShardIter<T> shard) {
while(shard.iter.hasNext()) {
final SearchGroup<T> group = shard.next();
MergedGroup<T> mergedGroup = groupsSeen.get(group.groupValue);
final boolean isNew = mergedGroup == null;
//System.out.println(" next group=" + (group.groupValue == null ? "null" : ((BytesRef) group.groupValue).utf8ToString()) + " sort=" + Arrays.toString(group.sortValues));
if (isNew) {
// Start a new group:
//System.out.println(" new");
mergedGroup = new MergedGroup<T>(group.groupValue);
mergedGroup.minShardIndex = shard.shardIndex;
assert group.sortValues != null;
mergedGroup.topValues = group.sortValues;
groupsSeen.put(group.groupValue, mergedGroup);
mergedGroup.inQueue = true;
queue.add(mergedGroup);
} else if (mergedGroup.processed) {
// This shard produced a group that we already
// processed; move on to next group...
continue;
} else {
//System.out.println(" old");
boolean competes = false;
for(int compIDX=0;compIDX<groupComp.comparators.length;compIDX++) {
final int cmp = groupComp.reversed[compIDX] * groupComp.comparators[compIDX].compareValues(group.sortValues[compIDX],
mergedGroup.topValues[compIDX]);
if (cmp < 0) {
// Definitely competes
competes = true;
break;
} else if (cmp > 0) {
// Definitely does not compete
break;
} else if (compIDX == groupComp.comparators.length-1) {
if (shard.shardIndex < mergedGroup.minShardIndex) {
competes = true;
}
}
}
//System.out.println(" competes=" + competes);
if (competes) {
// Group's sort changed -- remove & re-insert
if (mergedGroup.inQueue) {
queue.remove(mergedGroup);
}
mergedGroup.topValues = group.sortValues;
mergedGroup.minShardIndex = shard.shardIndex;
queue.add(mergedGroup);
mergedGroup.inQueue = true;
}
}
mergedGroup.shards.add(shard);
break;
}
// Prune un-competitive groups:
while(queue.size() > topN) {
// TODO java 1.6: .pollLast
final MergedGroup<T> group = queue.last();
//System.out.println("PRUNE: " + group);
queue.remove(group);
group.inQueue = false;
}
}
public Collection<SearchGroup<T>> merge(List<Collection<SearchGroup<T>>> shards, int offset, int topN) {
final int maxQueueSize = offset + topN;
//System.out.println("merge");
// Init queue:
for(int shardIDX=0;shardIDX<shards.size();shardIDX++) {
final Collection<SearchGroup<T>> shard = shards.get(shardIDX);
if (!shard.isEmpty()) {
//System.out.println(" insert shard=" + shardIDX);
updateNextGroup(maxQueueSize, new ShardIter<T>(shard, shardIDX));
}
}
// Pull merged topN groups:
final List<SearchGroup<T>> newTopGroups = new ArrayList<SearchGroup<T>>();
int count = 0;
while(queue.size() != 0) {
// TODO Java 1.6: pollFirst()
final MergedGroup<T> group = queue.first();
queue.remove(group);
group.processed = true;
//System.out.println(" pop: shards=" + group.shards + " group=" + (group.groupValue == null ? "null" : (((BytesRef) group.groupValue).utf8ToString())) + " sortValues=" + Arrays.toString(group.topValues));
if (count++ >= offset) {
final SearchGroup<T> newGroup = new SearchGroup<T>();
newGroup.groupValue = group.groupValue;
newGroup.sortValues = group.topValues;
newTopGroups.add(newGroup);
if (newTopGroups.size() == topN) {
break;
}
//} else {
// System.out.println(" skip < offset");
}
// Advance all iters in this group:
for(ShardIter<T> shardIter : group.shards) {
updateNextGroup(maxQueueSize, shardIter);
}
}
if (newTopGroups.size() == 0) {
return null;
} else {
return newTopGroups;
}
}
}
/** Merges multiple collections of top groups, for example
* obtained from separate index shards. The provided
* groupSort must match how the groups were sorted, and
* the provided SearchGroups must have been computed
* with fillFields=true passed to {@link
* AbstractFirstPassGroupingCollector#getTopGroups}.
*
* <p>NOTE: this returns null if the topGroups is empty.
*/
public static <T> Collection<SearchGroup<T>> merge(List<Collection<SearchGroup<T>>> topGroups, int offset, int topN, Sort groupSort)
throws IOException {
if (topGroups.size() == 0) {
return null;
} else {
return new GroupMerger<T>(groupSort).merge(topGroups, offset, topN);
}
}
}

View File

@ -1,7 +1,5 @@
package org.apache.lucene.search.grouping;
import org.apache.lucene.search.SortField;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -19,6 +17,14 @@ import org.apache.lucene.search.SortField;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
/** Represents result returned by a grouping search.
*
* @lucene.experimental */
@ -58,4 +64,113 @@ public class TopGroups<GROUP_VALUE_TYPE> {
this.groups = oldTopGroups.groups;
this.totalGroupCount = totalGroupCount;
}
/** Merges an array of TopGroups, for example obtained
* from the second-pass collector across multiple
* shards. Each TopGroups must have been sorted by the
* same groupSort and docSort, and the top groups passed
* to all second-pass collectors must be the same.
*
* <b>NOTE</b>: this cannot merge totalGroupCount; ie the
* returned TopGroups will have null totalGroupCount.
*
* <b>NOTE</b>: the topDocs in each GroupDocs is actually
* an instance of TopDocsAndShards
*/
public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN)
throws IOException {
//System.out.println("TopGroups.merge");
if (shardGroups.length == 0) {
return null;
}
int totalHitCount = 0;
int totalGroupedHitCount = 0;
final int numGroups = shardGroups[0].groups.length;
for(TopGroups<T> shard : shardGroups) {
if (numGroups != shard.groups.length) {
throw new IllegalArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
}
totalHitCount += shard.totalHitCount;
totalGroupedHitCount += shard.totalGroupedHitCount;
}
@SuppressWarnings("unchecked")
final GroupDocs<T>[] mergedGroupDocs = new GroupDocs[numGroups];
final TopDocs[] shardTopDocs = new TopDocs[shardGroups.length];
for(int groupIDX=0;groupIDX<numGroups;groupIDX++) {
final T groupValue = shardGroups[0].groups[groupIDX].groupValue;
//System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
float maxScore = Float.MIN_VALUE;
int totalHits = 0;
for(int shardIDX=0;shardIDX<shardGroups.length;shardIDX++) {
//System.out.println(" shard=" + shardIDX);
final TopGroups<T> shard = shardGroups[shardIDX];
final GroupDocs shardGroupDocs = shard.groups[groupIDX];
if (groupValue == null) {
if (shardGroupDocs.groupValue != null) {
throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
}
} else if (!groupValue.equals(shardGroupDocs.groupValue)) {
throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
}
/*
for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
System.out.println(" doc=" + sd.doc);
}
*/
shardTopDocs[shardIDX] = new TopDocs(shardGroupDocs.totalHits,
shardGroupDocs.scoreDocs,
shardGroupDocs.maxScore);
maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
totalHits += shardGroupDocs.totalHits;
}
final TopDocs.TopDocsAndShards mergedTopDocs = TopDocs.merge(docSort, docOffset + docTopN, shardTopDocs);
// Slice;
final ScoreDoc[] mergedScoreDocs;
final int[] mergedShardIndex;
if (docOffset == 0) {
mergedScoreDocs = mergedTopDocs.scoreDocs;
mergedShardIndex = mergedTopDocs.shardIndex;
} else if (docOffset >= mergedTopDocs.scoreDocs.length) {
mergedScoreDocs = new ScoreDoc[0];
mergedShardIndex = new int[0];
} else {
mergedScoreDocs = new ScoreDoc[mergedTopDocs.scoreDocs.length - docOffset];
System.arraycopy(mergedTopDocs.scoreDocs,
docOffset,
mergedScoreDocs,
0,
mergedTopDocs.scoreDocs.length - docOffset);
mergedShardIndex = new int[mergedTopDocs.scoreDocs.length - docOffset];
System.arraycopy(mergedTopDocs.shardIndex,
docOffset,
mergedShardIndex,
0,
mergedTopDocs.scoreDocs.length - docOffset);
}
//System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
mergedGroupDocs[groupIDX] = new GroupDocsAndShards<T>(maxScore,
totalHits,
mergedScoreDocs,
groupValue,
shardGroups[0].groups[groupIDX].groupSortValues,
mergedShardIndex);
}
return new TopGroups<T>(groupSort.getSort(),
docSort == null ? null : docSort.getSort(),
totalHitCount,
totalGroupedHitCount,
mergedGroupDocs);
}
}

View File

@ -17,6 +17,9 @@
package org.apache.lucene.search.grouping;
import java.io.IOException;
import java.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -28,11 +31,9 @@ import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util._TestUtil;
import java.io.IOException;
import java.util.*;
// TODO
// - should test relevance sort too
// - test null
@ -244,7 +245,6 @@ public class TestGrouping extends LuceneTestCase {
return fields;
}
/*
private String groupToString(BytesRef b) {
if (b == null) {
return "null";
@ -252,7 +252,6 @@ public class TestGrouping extends LuceneTestCase {
return b.utf8ToString();
}
}
*/
private TopGroups<BytesRef> slowGrouping(GroupDoc[] groupDocs,
String searchTerm,
@ -418,6 +417,38 @@ public class TestGrouping extends LuceneTestCase {
return r;
}
private static class ShardState {
public final ShardSearcher[] subSearchers;
public final int[] docStarts;
public ShardState(IndexSearcher s) {
IndexReader[] subReaders = s.getIndexReader().getSequentialSubReaders();
if (subReaders == null) {
subReaders = new IndexReader[] {s.getIndexReader()};
}
subSearchers = new ShardSearcher[subReaders.length];
final IndexReader.ReaderContext ctx = s.getTopReaderContext();
if (ctx instanceof IndexReader.AtomicReaderContext) {
assert subSearchers.length == 1;
subSearchers[0] = new ShardSearcher((IndexReader.AtomicReaderContext) ctx, ctx);
} else {
final IndexReader.CompositeReaderContext compCTX = (IndexReader.CompositeReaderContext) ctx;
for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
subSearchers[searcherIDX] = new ShardSearcher(compCTX.leaves[searcherIDX], compCTX);
}
}
docStarts = new int[subSearchers.length];
int docBase = 0;
for(int subIDX=0;subIDX<docStarts.length;subIDX++) {
docStarts[subIDX] = docBase;
docBase += subReaders[subIDX].maxDoc();
//System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);
}
}
}
public void testRandom() throws Exception {
for(int iter=0;iter<3;iter++) {
@ -525,7 +556,8 @@ public class TestGrouping extends LuceneTestCase {
Directory dir2 = null;
try {
final IndexSearcher s = new IndexSearcher(r);
final IndexSearcher s = newSearcher(r);
final ShardState shards = new ShardState(s);
for(int contentID=0;contentID<3;contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
@ -549,7 +581,8 @@ public class TestGrouping extends LuceneTestCase {
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
final int[] docIDToID2 = FieldCache.DEFAULT.getInts(r2, "id");
final IndexSearcher s2 = new IndexSearcher(r2);
final IndexSearcher s2 = newSearcher(r2);
final ShardState shards2 = new ShardState(s2);
// Reader2 only increases maxDoc() vs reader, which
// means a monotonic shift in scores, so we can
@ -604,7 +637,7 @@ public class TestGrouping extends LuceneTestCase {
}
final int topNGroups = _TestUtil.nextInt(random, 1, 30);
//final int topNGroups = 4;
//final int topNGroups = 10;
final int docsPerGroup = _TestUtil.nextInt(random, 1, 50);
final int groupOffset = _TestUtil.nextInt(random, 0, (topNGroups-1)/2);
@ -658,7 +691,9 @@ public class TestGrouping extends LuceneTestCase {
}
}
s.search(new TermQuery(new Term("content", searchTerm)), c);
// Search top reader:
final Query q = new TermQuery(new Term("content", searchTerm));
s.search(q, c);
if (doCache && !useWrappingCollector) {
if (cCache.isCached()) {
@ -679,6 +714,18 @@ public class TestGrouping extends LuceneTestCase {
final Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;
if (VERBOSE) {
System.out.println("TEST: topGroups:");
if (topGroups == null) {
System.out.println(" null");
} else {
for(SearchGroup<BytesRef> groupx : topGroups) {
System.out.println(" " + groupToString(groupx.groupValue) + " sort=" + Arrays.toString(groupx.sortValues));
}
}
}
final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
if (topGroups != null) {
@ -734,7 +781,13 @@ public class TestGrouping extends LuceneTestCase {
}
}
}
assertEquals(docIDToID, expectedGroups, groupsResult, true, getScores);
assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores);
// Confirm merged shards match:
assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores);
if (topGroupsShards != null) {
verifyShards(shards.docStarts, topGroupsShards);
}
final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
@ -758,6 +811,8 @@ public class TestGrouping extends LuceneTestCase {
groupsResult2 = tempTopGroups2;
}
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(s2, shards2.subSearchers, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
if (expectedGroups != null) {
// Fixup scores for reader2
for (GroupDocs groupDocsHits : expectedGroups.groups) {
@ -799,8 +854,11 @@ public class TestGrouping extends LuceneTestCase {
}
}
assertEquals(docIDToID2, expectedGroups, groupsResult2, false, getScores);
assertEquals(docIDToID2, expectedGroups, groupsResult2, false, true, true, getScores);
assertEquals(docIDToID2, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores);
}
s.close();
s2.close();
} finally {
FieldCache.DEFAULT.purge(r);
if (r2 != null) {
@ -816,7 +874,95 @@ public class TestGrouping extends LuceneTestCase {
}
}
private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues, boolean testScores) {
private void verifyShards(int[] docStarts, TopGroups<BytesRef> topGroups) {
for(GroupDocs group : topGroups.groups) {
assertTrue(group instanceof GroupDocsAndShards);
GroupDocsAndShards withShards = (GroupDocsAndShards) group;
for(int hitIDX=0;hitIDX<withShards.scoreDocs.length;hitIDX++) {
final ScoreDoc sd = withShards.scoreDocs[hitIDX];
assertEquals("doc=" + sd.doc + " wrong shard",
ReaderUtil.subIndex(sd.doc, docStarts),
withShards.shardIndex[hitIDX]);
}
}
}
private void assertEquals(Collection<SearchGroup<BytesRef>> groups1, Collection<SearchGroup<BytesRef>> groups2, boolean doSortValues) {
assertEquals(groups1.size(), groups2.size());
final Iterator<SearchGroup<BytesRef>> iter1 = groups1.iterator();
final Iterator<SearchGroup<BytesRef>> iter2 = groups2.iterator();
while(iter1.hasNext()) {
assertTrue(iter2.hasNext());
SearchGroup<BytesRef> group1 = iter1.next();
SearchGroup<BytesRef> group2 = iter2.next();
assertEquals(group1.groupValue, group2.groupValue);
if (doSortValues) {
assertEquals(group1.sortValues, group2.sortValues);
}
}
assertFalse(iter2.hasNext());
}
private TopGroups<BytesRef> searchShards(IndexSearcher topSearcher, ShardSearcher[] subSearchers, Query query, Sort groupSort, Sort docSort, int groupOffset, int topNGroups, int docOffset,
int topNDocs, boolean getScores, boolean getMaxScores) throws Exception {
// TODO: swap in caching, all groups collector here
// too...
if (VERBOSE) {
System.out.println("TEST: " + subSearchers.length + " shards: " + Arrays.toString(subSearchers));
}
// Run 1st pass collector to get top groups per shard
final Weight w = query.weight(topSearcher);
final List<Collection<SearchGroup<BytesRef>>> shardGroups = new ArrayList<Collection<SearchGroup<BytesRef>>>();
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
final TermFirstPassGroupingCollector c = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
subSearchers[shardIDX].search(w, c);
final Collection<SearchGroup<BytesRef>> topGroups = c.getTopGroups(0, true);
if (topGroups != null) {
if (VERBOSE) {
System.out.println(" shard " + shardIDX + " s=" + subSearchers[shardIDX] + " " + topGroups.size() + " groups:");
for(SearchGroup<BytesRef> group : topGroups) {
System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues));
}
}
shardGroups.add(topGroups);
}
}
final Collection<SearchGroup<BytesRef>> mergedTopGroups = SearchGroup.merge(shardGroups, groupOffset, topNGroups, groupSort);
if (VERBOSE) {
System.out.println(" merged:");
if (mergedTopGroups == null) {
System.out.println(" null");
} else {
for(SearchGroup<BytesRef> group : mergedTopGroups) {
System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues));
}
}
}
if (mergedTopGroups != null) {
// Now 2nd pass:
@SuppressWarnings("unchecked")
final TopGroups<BytesRef>[] shardTopGroups = new TopGroups[subSearchers.length];
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
final TermSecondPassGroupingCollector c = new TermSecondPassGroupingCollector("group", mergedTopGroups, groupSort, docSort,
docOffset + topNDocs, getScores, getMaxScores, true);
subSearchers[shardIDX].search(w, c);
shardTopGroups[shardIDX] = c.getTopGroups(0);
}
return TopGroups.merge(shardTopGroups, groupSort, docSort, docOffset, topNDocs);
} else {
return null;
}
}
private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues, boolean verifyTotalGroupCount, boolean verifySortValues, boolean testScores) {
if (expected == null) {
assertNull(actual);
return;
@ -826,7 +972,7 @@ public class TestGrouping extends LuceneTestCase {
assertEquals(expected.groups.length, actual.groups.length);
assertEquals(expected.totalHitCount, actual.totalHitCount);
assertEquals(expected.totalGroupedHitCount, actual.totalGroupedHitCount);
if (expected.totalGroupCount != null) {
if (expected.totalGroupCount != null && verifyTotalGroupCount) {
assertEquals(expected.totalGroupCount, actual.totalGroupCount);
}
@ -839,7 +985,9 @@ public class TestGrouping extends LuceneTestCase {
if (verifyGroupValues) {
assertEquals(expectedGroup.groupValue, actualGroup.groupValue);
}
if (verifySortValues) {
assertArrayEquals(expectedGroup.groupSortValues, actualGroup.groupSortValues);
}
// TODO
// assertEquals(expectedGroup.maxScore, actualGroup.maxScore);
@ -860,8 +1008,32 @@ public class TestGrouping extends LuceneTestCase {
// TODO: too anal for now
//assertEquals(Float.NaN, actualFD.score);
}
if (verifySortValues) {
assertArrayEquals(expectedFD.fields, actualFD.fields);
}
}
}
}
private static class ShardSearcher extends IndexSearcher {
private final IndexReader.AtomicReaderContext[] ctx;
public ShardSearcher(IndexReader.AtomicReaderContext ctx, IndexReader.ReaderContext parent) {
super(parent);
this.ctx = new IndexReader.AtomicReaderContext[] {ctx};
}
public void search(Weight weight, Collector collector) throws IOException {
search(ctx, weight, null, collector);
}
public TopDocs search(Weight weight, int topN) throws IOException {
return search(ctx, weight, null, topN);
}
@Override
public String toString() {
return "ShardSearcher(" + ctx[0] + ")";
}
}
}

View File

@ -476,8 +476,8 @@ class ElevationComparatorSource extends FieldComparatorSource {
}
@Override
public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator() {
public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator<Integer>() {
FieldCache.DocTermsIndex idIndex;
private final int[] values = new int[numHits];
@ -517,7 +517,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
}
@Override
public Comparable value(int slot) {
public Integer value(int slot) {
return values[slot];
}
};

View File

@ -102,8 +102,8 @@ public class RandomSortField extends FieldType {
private static FieldComparatorSource randomComparatorSource = new FieldComparatorSource() {
@Override
public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator() {
public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator<Integer>() {
int seed;
private final int[] values = new int[numHits];
int bottomVal;
@ -135,7 +135,7 @@ public class RandomSortField extends FieldType {
}
@Override
public Comparable value(int slot) {
public Integer value(int slot) {
return values[slot];
}
};

View File

@ -56,7 +56,7 @@ public class MissingStringLastComparatorSource extends FieldComparatorSource {
// Copied from Lucene's TermOrdValComparator and modified since the Lucene version couldn't
// be extended.
class TermOrdValComparator_SML extends FieldComparator {
class TermOrdValComparator_SML extends FieldComparator<BytesRef> {
private static final int NULL_ORD = Integer.MAX_VALUE;
private final int[] ords;
@ -98,7 +98,7 @@ class TermOrdValComparator_SML extends FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public BytesRef value(int slot) {
throw new UnsupportedOperationException();
}
@ -111,7 +111,7 @@ class TermOrdValComparator_SML extends FieldComparator {
// ords) per-segment comparator. NOTE: this is messy;
// we do this only because hotspot can't reliably inline
// the underlying array access when looking up doc->ord
private static abstract class PerSegmentComparator extends FieldComparator {
private static abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
protected TermOrdValComparator_SML parent;
protected final int[] ords;
protected final BytesRef[] values;
@ -199,7 +199,7 @@ class TermOrdValComparator_SML extends FieldComparator {
}
@Override
public Comparable<?> value(int slot) {
public BytesRef value(int slot) {
return values==null ? parent.NULL_VAL : values[slot];
}
}

View File

@ -141,7 +141,7 @@ public abstract class ValueSource implements Serializable {
* off of the {@link org.apache.solr.search.function.DocValues} for a ValueSource
* instead of the normal Lucene FieldComparator that works off of a FieldCache.
*/
class ValueSourceComparator extends FieldComparator {
class ValueSourceComparator extends FieldComparator<Double> {
private final double[] values;
private DocValues docVals;
private double bottom;
@ -195,7 +195,7 @@ public abstract class ValueSource implements Serializable {
}
@Override
public Comparable value(int slot) {
public Double value(int slot) {
return values[slot];
}
}