mirror of https://github.com/apache/lucene.git
LUCENE-3191: add TopDocs.merge, TopGroups.merge, SearchGroup.merge, to make sharding easier
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1136080 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3d8fc3de5d
commit
a1e74fd6c9
|
@ -520,6 +520,9 @@ New Features
|
|||
algorithm over objects that implement the new TwoPhaseCommit interface (such
|
||||
as IndexWriter). (Shai Erera)
|
||||
|
||||
* LUCENE-3191: Added TopDocs.merge, to facilitate merging results from
|
||||
different shards (Uwe Schindler, Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-1344: Create OSGi bundle using dev-tools/maven.
|
||||
|
|
|
@ -75,6 +75,10 @@ New Features
|
|||
allow an app to control which indexing changes must be visible to
|
||||
which search requests. (Mike McCandless)
|
||||
|
||||
* LUCENE-3191: Added SearchGroup.merge and TopGroups.merge, to
|
||||
facilitate doing grouping in a distributed environment (Uwe
|
||||
Schindler, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class SlowCollatedStringComparator extends FieldComparator {
|
||||
public final class SlowCollatedStringComparator extends FieldComparator<BytesRef> {
|
||||
|
||||
private final String[] values;
|
||||
private DocTerms currentDocTerms;
|
||||
|
@ -99,8 +99,22 @@ public final class SlowCollatedStringComparator extends FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public BytesRef value(int slot) {
|
||||
final String s = values[slot];
|
||||
return s == null ? null : new BytesRef(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareValues(BytesRef first, BytesRef second) {
|
||||
if (first == null) {
|
||||
if (second == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (second == null) {
|
||||
return 1;
|
||||
} else {
|
||||
return collator.compare(first, second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,16 +35,15 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
|
|||
private DistanceScoreDocLookupComparator dsdlc;
|
||||
|
||||
public DistanceFieldComparatorSource(Filter distanceFilter) {
|
||||
|
||||
this.distanceFilter = (DistanceFilter) distanceFilter;
|
||||
|
||||
}
|
||||
|
||||
public void cleanUp() {
|
||||
distanceFilter = null;
|
||||
|
||||
if (dsdlc != null)
|
||||
if (dsdlc != null) {
|
||||
dsdlc.cleanUp();
|
||||
}
|
||||
|
||||
dsdlc = null;
|
||||
}
|
||||
|
@ -56,7 +55,7 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
|
|||
return dsdlc;
|
||||
}
|
||||
|
||||
private class DistanceScoreDocLookupComparator extends FieldComparator {
|
||||
private class DistanceScoreDocLookupComparator extends FieldComparator<Double> {
|
||||
|
||||
private double[] values;
|
||||
private double bottom;
|
||||
|
@ -103,7 +102,6 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
|
|||
@Override
|
||||
public void setBottom(int slot) {
|
||||
this.bottom = values[slot];
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -116,9 +114,8 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<Double> value(int slot) {
|
||||
public Double value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -60,6 +60,11 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader {
|
|||
readerContext = new AtomicReaderContext(this); // emulate atomic reader!
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SlowMultiReaderWrapper(" + in + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return MultiFields.getFields(in);
|
||||
|
|
|
@ -96,7 +96,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class FieldComparator {
|
||||
public abstract class FieldComparator<T> {
|
||||
|
||||
/**
|
||||
* Compare hit at slot1 with hit at slot2.
|
||||
|
@ -176,13 +176,21 @@ public abstract class FieldComparator {
|
|||
* Return the actual value in the slot.
|
||||
*
|
||||
* @param slot the value
|
||||
* @return value in this slot upgraded to Comparable
|
||||
* @return value in this slot
|
||||
*/
|
||||
public abstract Comparable<?> value(int slot);
|
||||
public abstract T value(int slot);
|
||||
|
||||
/** Returns -1 if first is less than second. Default
|
||||
* impl to assume the type implements Comparable and
|
||||
* invoke .compareTo; be sure to override this method if
|
||||
* your FieldComparator's type isn't a Comparable or
|
||||
* if your values may sometimes be null */
|
||||
@SuppressWarnings("unchecked")
|
||||
public int compareValues(T first, T second) {
|
||||
return ((Comparable<T>) first).compareTo(second);
|
||||
}
|
||||
|
||||
|
||||
public static abstract class NumericComparator<T extends CachedArray> extends FieldComparator {
|
||||
public static abstract class NumericComparator<T extends CachedArray, U extends Number> extends FieldComparator<U> {
|
||||
protected final CachedArrayCreator<T> creator;
|
||||
protected T cached;
|
||||
protected final boolean checkMissing;
|
||||
|
@ -203,7 +211,7 @@ public abstract class FieldComparator {
|
|||
|
||||
/** Parses field's values as byte (using {@link
|
||||
* FieldCache#getBytes} and sorts by ascending value */
|
||||
public static final class ByteComparator extends NumericComparator<ByteValues> {
|
||||
public static final class ByteComparator extends NumericComparator<ByteValues,Byte> {
|
||||
private byte[] docValues;
|
||||
private final byte[] values;
|
||||
private final byte missingValue;
|
||||
|
@ -252,7 +260,7 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Byte value(int slot) {
|
||||
return Byte.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
@ -260,13 +268,12 @@ public abstract class FieldComparator {
|
|||
|
||||
/** Parses field's values as double (using {@link
|
||||
* FieldCache#getDoubles} and sorts by ascending value */
|
||||
public static final class DoubleComparator extends NumericComparator<DoubleValues> {
|
||||
public static final class DoubleComparator extends NumericComparator<DoubleValues,Double> {
|
||||
private double[] docValues;
|
||||
private final double[] values;
|
||||
private final double missingValue;
|
||||
private double bottom;
|
||||
|
||||
|
||||
DoubleComparator(int numHits, DoubleValuesCreator creator, Double missingValue ) {
|
||||
super( creator, missingValue != null );
|
||||
values = new double[numHits];
|
||||
|
@ -324,13 +331,13 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Double value(int slot) {
|
||||
return Double.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Uses float index values to sort by ascending value */
|
||||
public static final class FloatDocValuesComparator extends FieldComparator {
|
||||
public static final class FloatDocValuesComparator extends FieldComparator<Double> {
|
||||
private final double[] values;
|
||||
private Source currentReaderValues;
|
||||
private final String field;
|
||||
|
@ -386,14 +393,14 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<Double> value(int slot) {
|
||||
public Double value(int slot) {
|
||||
return Double.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as float (using {@link
|
||||
* FieldCache#getFloats} and sorts by ascending value */
|
||||
public static final class FloatComparator extends NumericComparator<FloatValues> {
|
||||
public static final class FloatComparator extends NumericComparator<FloatValues,Float> {
|
||||
private float[] docValues;
|
||||
private final float[] values;
|
||||
private final float missingValue;
|
||||
|
@ -460,14 +467,14 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Float value(int slot) {
|
||||
return Float.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as short (using {@link
|
||||
* FieldCache#getShorts} and sorts by ascending value */
|
||||
public static final class ShortComparator extends NumericComparator<ShortValues> {
|
||||
public static final class ShortComparator extends NumericComparator<ShortValues,Short> {
|
||||
private short[] docValues;
|
||||
private final short[] values;
|
||||
private short bottom;
|
||||
|
@ -516,14 +523,14 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Short value(int slot) {
|
||||
return Short.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as int (using {@link
|
||||
* FieldCache#getInts} and sorts by ascending value */
|
||||
public static final class IntComparator extends NumericComparator<IntValues> {
|
||||
public static final class IntComparator extends NumericComparator<IntValues,Integer> {
|
||||
private int[] docValues;
|
||||
private final int[] values;
|
||||
private int bottom; // Value of bottom of queue
|
||||
|
@ -594,13 +601,13 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Loads int index values and sorts by ascending value. */
|
||||
public static final class IntDocValuesComparator extends FieldComparator {
|
||||
public static final class IntDocValuesComparator extends FieldComparator<Long> {
|
||||
private final long[] values;
|
||||
private Source currentReaderValues;
|
||||
private final String field;
|
||||
|
@ -660,14 +667,14 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<Long> value(int slot) {
|
||||
public Long value(int slot) {
|
||||
return Long.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as long (using {@link
|
||||
* FieldCache#getLongs} and sorts by ascending value */
|
||||
public static final class LongComparator extends NumericComparator<LongValues> {
|
||||
public static final class LongComparator extends NumericComparator<LongValues,Long> {
|
||||
private long[] docValues;
|
||||
private final long[] values;
|
||||
private long bottom;
|
||||
|
@ -735,7 +742,7 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Long value(int slot) {
|
||||
return Long.valueOf(values[slot]);
|
||||
}
|
||||
}
|
||||
|
@ -746,7 +753,7 @@ public abstract class FieldComparator {
|
|||
* using {@link TopScoreDocCollector} directly (which {@link
|
||||
* IndexSearcher#search} uses when no {@link Sort} is
|
||||
* specified). */
|
||||
public static final class RelevanceComparator extends FieldComparator {
|
||||
public static final class RelevanceComparator extends FieldComparator<Float> {
|
||||
private final float[] scores;
|
||||
private float bottom;
|
||||
private Scorer scorer;
|
||||
|
@ -791,15 +798,21 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Float value(int slot) {
|
||||
return Float.valueOf(scores[slot]);
|
||||
}
|
||||
|
||||
// Override because we sort reverse of natural Float order:
|
||||
@Override
|
||||
public int compareValues(Float first, Float second) {
|
||||
// Reversed intentionally because relevance by default
|
||||
// sorts descending:
|
||||
return second.compareTo(first);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Sorts by ascending docID */
|
||||
public static final class DocComparator extends FieldComparator {
|
||||
public static final class DocComparator extends FieldComparator<Integer> {
|
||||
private final int[] docIDs;
|
||||
private int docBase;
|
||||
private int bottom;
|
||||
|
@ -840,7 +853,7 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(docIDs[slot]);
|
||||
}
|
||||
}
|
||||
|
@ -854,7 +867,7 @@ public abstract class FieldComparator {
|
|||
* to large results, this comparator will be much faster
|
||||
* than {@link TermValComparator}. For very small
|
||||
* result sets it may be slower. */
|
||||
public static final class TermOrdValComparator extends FieldComparator {
|
||||
public static final class TermOrdValComparator extends FieldComparator<BytesRef> {
|
||||
/** @lucene.internal */
|
||||
final int[] ords;
|
||||
/** @lucene.internal */
|
||||
|
@ -920,7 +933,7 @@ public abstract class FieldComparator {
|
|||
* the underlying array access when looking up doc->ord
|
||||
* @lucene.internal
|
||||
*/
|
||||
abstract class PerSegmentComparator extends FieldComparator {
|
||||
abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
|
||||
|
||||
@Override
|
||||
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||
|
@ -938,7 +951,7 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public BytesRef value(int slot) {
|
||||
return TermOrdValComparator.this.value(slot);
|
||||
}
|
||||
}
|
||||
|
@ -1244,7 +1257,7 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public BytesRef value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
}
|
||||
|
@ -1253,7 +1266,7 @@ public abstract class FieldComparator {
|
|||
* comparisons are done using BytesRef.compareTo, which is
|
||||
* slow for medium to large result sets but possibly
|
||||
* very fast for very small results sets. */
|
||||
public static final class TermValComparator extends FieldComparator {
|
||||
public static final class TermValComparator extends FieldComparator<BytesRef> {
|
||||
|
||||
private BytesRef[] values;
|
||||
private DocTerms docTerms;
|
||||
|
@ -1316,7 +1329,7 @@ public abstract class FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public BytesRef value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,12 +40,13 @@ public class FieldDoc extends ScoreDoc {
|
|||
|
||||
/** Expert: The values which are used to sort the referenced document.
|
||||
* The order of these will match the original sort criteria given by a
|
||||
* Sort object. Each Object will be either an Integer, Float or String,
|
||||
* depending on the type of values in the terms of the original field.
|
||||
* Sort object. Each Object will have been returned from
|
||||
* the <code>value</code> method corresponding
|
||||
* FieldComparator used to sort this field.
|
||||
* @see Sort
|
||||
* @see IndexSearcher#search(Query,Filter,int,Sort)
|
||||
*/
|
||||
public Comparable[] fields;
|
||||
public Object[] fields;
|
||||
|
||||
/** Expert: Creates one of these objects with empty sort information. */
|
||||
public FieldDoc (int doc, float score) {
|
||||
|
@ -53,7 +54,7 @@ public class FieldDoc extends ScoreDoc {
|
|||
}
|
||||
|
||||
/** Expert: Creates one of these objects with the given sort information. */
|
||||
public FieldDoc (int doc, float score, Comparable[] fields) {
|
||||
public FieldDoc (int doc, float score, Object[] fields) {
|
||||
super (doc, score);
|
||||
this.fields = fields;
|
||||
}
|
||||
|
|
|
@ -200,7 +200,7 @@ public abstract class FieldValueHitQueue extends PriorityQueue<FieldValueHitQueu
|
|||
*/
|
||||
FieldDoc fillFields(final Entry entry) {
|
||||
final int n = comparators.length;
|
||||
final Comparable<?>[] fields = new Comparable[n];
|
||||
final Object[] fields = new Object[n];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fields[i] = comparators[i].value(entry.slot);
|
||||
}
|
||||
|
|
|
@ -91,10 +91,10 @@ public class SortField {
|
|||
public static final int BYTES = 12;
|
||||
|
||||
/** Represents sorting by document score (relevance). */
|
||||
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
|
||||
public static final SortField FIELD_SCORE = new SortField(null, SCORE);
|
||||
|
||||
/** Represents sorting by document number (index order). */
|
||||
public static final SortField FIELD_DOC = new SortField (null, DOC);
|
||||
public static final SortField FIELD_DOC = new SortField(null, DOC);
|
||||
|
||||
private String field;
|
||||
private int type; // defaults to determining type dynamically
|
||||
|
@ -111,7 +111,7 @@ public class SortField {
|
|||
* <code>type</code> is SCORE or DOC.
|
||||
* @param type Type of values in the terms.
|
||||
*/
|
||||
public SortField (String field, int type) {
|
||||
public SortField(String field, int type) {
|
||||
initFieldType(field, type);
|
||||
}
|
||||
|
||||
|
@ -122,7 +122,7 @@ public class SortField {
|
|||
* @param type Type of values in the terms.
|
||||
* @param reverse True if natural order should be reversed.
|
||||
*/
|
||||
public SortField (String field, int type, boolean reverse) {
|
||||
public SortField(String field, int type, boolean reverse) {
|
||||
initFieldType(field, type);
|
||||
this.reverse = reverse;
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ public class SortField {
|
|||
* @deprecated (4.0) use EntryCreator version
|
||||
*/
|
||||
@Deprecated
|
||||
public SortField (String field, FieldCache.Parser parser) {
|
||||
public SortField(String field, FieldCache.Parser parser) {
|
||||
this(field, parser, false);
|
||||
}
|
||||
|
||||
|
@ -158,7 +158,7 @@ public class SortField {
|
|||
* @deprecated (4.0) use EntryCreator version
|
||||
*/
|
||||
@Deprecated
|
||||
public SortField (String field, FieldCache.Parser parser, boolean reverse) {
|
||||
public SortField(String field, FieldCache.Parser parser, boolean reverse) {
|
||||
if (field == null) {
|
||||
throw new IllegalArgumentException("field can only be null when type is SCORE or DOC");
|
||||
}
|
||||
|
@ -225,7 +225,7 @@ public class SortField {
|
|||
* @param field Name of field to sort by; cannot be <code>null</code>.
|
||||
* @param comparator Returns a comparator for sorting hits.
|
||||
*/
|
||||
public SortField (String field, FieldComparatorSource comparator) {
|
||||
public SortField(String field, FieldComparatorSource comparator) {
|
||||
initFieldType(field, CUSTOM);
|
||||
this.comparatorSource = comparator;
|
||||
}
|
||||
|
@ -235,7 +235,7 @@ public class SortField {
|
|||
* @param comparator Returns a comparator for sorting hits.
|
||||
* @param reverse True if natural order should be reversed.
|
||||
*/
|
||||
public SortField (String field, FieldComparatorSource comparator, boolean reverse) {
|
||||
public SortField(String field, FieldComparatorSource comparator, boolean reverse) {
|
||||
initFieldType(field, CUSTOM);
|
||||
this.reverse = reverse;
|
||||
this.comparatorSource = comparator;
|
||||
|
|
|
@ -89,7 +89,7 @@ public class TermQuery extends Query {
|
|||
public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
|
||||
final String field = term.field();
|
||||
final IndexReader reader = context.reader;
|
||||
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight is not the same as the current reader's top-reader";
|
||||
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
|
||||
final TermState state = termStates
|
||||
.get(context.ord);
|
||||
if (state == null) { // term is not present in that reader
|
||||
|
|
|
@ -17,6 +17,10 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Represents hits returned by {@link
|
||||
* IndexSearcher#search(Query,Filter,int)} and {@link
|
||||
* IndexSearcher#search(Query,int)}. */
|
||||
|
@ -52,4 +56,208 @@ public class TopDocs {
|
|||
this.scoreDocs = scoreDocs;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
|
||||
// Refers to one hit:
|
||||
private static class ShardRef {
|
||||
// Which shard (index into shardHits[]):
|
||||
final int shardIndex;
|
||||
|
||||
// Which hit within the shard:
|
||||
int hitIndex;
|
||||
|
||||
public ShardRef(int shardIndex) {
|
||||
this.shardIndex = shardIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ShardRef(shardIndex=" + shardIndex + " hitIndex=" + hitIndex + ")";
|
||||
}
|
||||
};
|
||||
|
||||
// Specialized MergeSortQueue that just merges by
|
||||
// relevance score, descending:
|
||||
private static class ScoreMergeSortQueue extends PriorityQueue<ShardRef> {
|
||||
final ScoreDoc[][] shardHits;
|
||||
|
||||
public ScoreMergeSortQueue(TopDocs[] shardHits) {
|
||||
super(shardHits.length);
|
||||
this.shardHits = new ScoreDoc[shardHits.length][];
|
||||
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
|
||||
this.shardHits[shardIDX] = shardHits[shardIDX].scoreDocs;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if first is < second
|
||||
public boolean lessThan(ShardRef first, ShardRef second) {
|
||||
assert first != second;
|
||||
final float firstScore = shardHits[first.shardIndex][first.hitIndex].score;
|
||||
final float secondScore = shardHits[second.shardIndex][second.hitIndex].score;
|
||||
|
||||
if (firstScore < secondScore) {
|
||||
return false;
|
||||
} else if (firstScore > secondScore) {
|
||||
return true;
|
||||
} else {
|
||||
// Tie break: earlier shard wins
|
||||
if (first.shardIndex < second.shardIndex) {
|
||||
return true;
|
||||
} else if (first.shardIndex > second.shardIndex) {
|
||||
return false;
|
||||
} else {
|
||||
// Tie break in same shard: resolve however the
|
||||
// shard had resolved it:
|
||||
assert first.hitIndex != second.hitIndex;
|
||||
return first.hitIndex < second.hitIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class MergeSortQueue extends PriorityQueue<ShardRef> {
|
||||
// These are really FieldDoc instances:
|
||||
final ScoreDoc[][] shardHits;
|
||||
final FieldComparator[] comparators;
|
||||
final int[] reverseMul;
|
||||
|
||||
public MergeSortQueue(Sort sort, TopDocs[] shardHits) throws IOException {
|
||||
super(shardHits.length);
|
||||
this.shardHits = new ScoreDoc[shardHits.length][];
|
||||
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
|
||||
final ScoreDoc[] shard = shardHits[shardIDX].scoreDocs;
|
||||
//System.out.println(" init shardIdx=" + shardIDX + " hits=" + shard);
|
||||
if (shard != null) {
|
||||
this.shardHits[shardIDX] = shard;
|
||||
// Fail gracefully if API is misused:
|
||||
for(int hitIDX=0;hitIDX<shard.length;hitIDX++) {
|
||||
final ScoreDoc sd = shard[hitIDX];
|
||||
if (!(sd instanceof FieldDoc)) {
|
||||
throw new IllegalArgumentException("shard " + shardIDX + " was not sorted by the provided Sort (expected FieldDoc but got ScoreDoc)");
|
||||
}
|
||||
final FieldDoc fd = (FieldDoc) sd;
|
||||
if (fd.fields == null) {
|
||||
throw new IllegalArgumentException("shard " + shardIDX + " did not set sort field values (FieldDoc.fields is null); you must pass fillFields=true to IndexSearcher.search on each shard");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final SortField[] sortFields = sort.getSort();
|
||||
comparators = new FieldComparator[sortFields.length];
|
||||
reverseMul = new int[sortFields.length];
|
||||
for(int compIDX=0;compIDX<sortFields.length;compIDX++) {
|
||||
final SortField sortField = sortFields[compIDX];
|
||||
comparators[compIDX] = sortField.getComparator(1, compIDX);
|
||||
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if first is < second
|
||||
@SuppressWarnings("unchecked")
|
||||
public boolean lessThan(ShardRef first, ShardRef second) {
|
||||
assert first != second;
|
||||
final FieldDoc firstFD = (FieldDoc) shardHits[first.shardIndex][first.hitIndex];
|
||||
final FieldDoc secondFD = (FieldDoc) shardHits[second.shardIndex][second.hitIndex];
|
||||
//System.out.println(" lessThan:\n first=" + first + " doc=" + firstFD.doc + " score=" + firstFD.score + "\n second=" + second + " doc=" + secondFD.doc + " score=" + secondFD.score);
|
||||
|
||||
for(int compIDX=0;compIDX<comparators.length;compIDX++) {
|
||||
final FieldComparator comp = comparators[compIDX];
|
||||
//System.out.println(" cmp idx=" + compIDX + " cmp1=" + firstFD.fields[compIDX] + " cmp2=" + secondFD.fields[compIDX] + " reverse=" + reverseMul[compIDX]);
|
||||
|
||||
final int cmp = reverseMul[compIDX] * comp.compareValues(firstFD.fields[compIDX], secondFD.fields[compIDX]);
|
||||
|
||||
if (cmp != 0) {
|
||||
//System.out.println(" return " + (cmp < 0));
|
||||
return cmp < 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Tie break: earlier shard wins
|
||||
if (first.shardIndex < second.shardIndex) {
|
||||
//System.out.println(" return tb true");
|
||||
return true;
|
||||
} else if (first.shardIndex > second.shardIndex) {
|
||||
//System.out.println(" return tb false");
|
||||
return false;
|
||||
} else {
|
||||
// Tie break in same shard: resolve however the
|
||||
// shard had resolved it:
|
||||
//System.out.println(" return tb " + (first.hitIndex < second.hitIndex));
|
||||
assert first.hitIndex != second.hitIndex;
|
||||
return first.hitIndex < second.hitIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Returned from {@link #merge}, to include the merged
|
||||
* TopDocs as well as the reference to which original
|
||||
* TopDocs shard each hit came from.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public static class TopDocsAndShards extends TopDocs {
|
||||
|
||||
/** Parallel array matching <code>hits.scoreDocs</code> */
|
||||
public final int[] shardIndex;
|
||||
|
||||
public TopDocsAndShards(int totalHits, ScoreDoc[] scoreDocs, float maxScore, int[] shardIndex) {
|
||||
super(totalHits, scoreDocs, maxScore);
|
||||
this.shardIndex = shardIndex;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a new TopDocs, containing topN results across
|
||||
* the provided TopDocs, sorting by the specified {@link
|
||||
* Sort}. Each of the TopDocs must have been sorted by
|
||||
* the same Sort, and sort field values must have been
|
||||
* filled (ie, <code>fillFields=true</code> must be
|
||||
* passed to {@link
|
||||
* TopFieldCollector#create}.
|
||||
*
|
||||
* <p>Pass sort=null to merge sort by score descending.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public static TopDocsAndShards merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException {
|
||||
|
||||
final PriorityQueue<ShardRef> queue;
|
||||
if (sort == null) {
|
||||
queue = new ScoreMergeSortQueue(shardHits);
|
||||
} else {
|
||||
queue = new MergeSortQueue(sort, shardHits);
|
||||
}
|
||||
|
||||
int totalHitCount = 0;
|
||||
float maxScore = Float.MIN_VALUE;
|
||||
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
|
||||
final TopDocs shard = shardHits[shardIDX];
|
||||
if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
|
||||
totalHitCount += shard.totalHits;
|
||||
queue.add(new ShardRef(shardIDX));
|
||||
maxScore = Math.max(maxScore, shard.getMaxScore());
|
||||
//System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore());
|
||||
}
|
||||
}
|
||||
|
||||
final ScoreDoc[] hits = new ScoreDoc[Math.min(topN, totalHitCount)];
|
||||
final int[] shardIndex = new int[hits.length];
|
||||
|
||||
int hitUpto = 0;
|
||||
while(hitUpto < hits.length) {
|
||||
assert queue.size() > 0;
|
||||
ShardRef ref = queue.pop();
|
||||
hits[hitUpto] = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
|
||||
shardIndex[hitUpto] = ref.shardIndex;
|
||||
|
||||
//System.out.println(" hitUpto=" + hitUpto);
|
||||
//System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
|
||||
|
||||
hitUpto++;
|
||||
|
||||
if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
|
||||
// Not done with this these TopDocs yet:
|
||||
queue.add(ref);
|
||||
}
|
||||
}
|
||||
|
||||
return new TopDocsAndShards(totalHitCount, hits, maxScore, shardIndex);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -308,7 +308,14 @@ public class RandomIndexWriter implements Closeable {
|
|||
return getReader(true);
|
||||
}
|
||||
|
||||
private boolean doRandomOptimize = true;
|
||||
|
||||
public void setDoRandomOptimize(boolean v) {
|
||||
doRandomOptimize = v;
|
||||
}
|
||||
|
||||
private void doRandomOptimize() throws IOException {
|
||||
if (doRandomOptimize) {
|
||||
final int segCount = w.getSegmentCount();
|
||||
if (r.nextBoolean() || segCount == 0) {
|
||||
// full optimize
|
||||
|
@ -319,6 +326,7 @@ public class RandomIndexWriter implements Closeable {
|
|||
w.optimize(limit);
|
||||
assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
|
||||
}
|
||||
}
|
||||
switchDoDocValues();
|
||||
}
|
||||
|
||||
|
|
|
@ -27,10 +27,10 @@ import java.io.OutputStream;
|
|||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Enumeration;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
|
||||
|
@ -46,6 +46,9 @@ import org.apache.lucene.index.MergeScheduler;
|
|||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.Assert;
|
||||
|
||||
|
@ -468,4 +471,24 @@ public class _TestUtil {
|
|||
newName.append(suffix);
|
||||
return new File(directory, newName.toString());
|
||||
}
|
||||
|
||||
public static void assertEquals(TopDocs expected, TopDocs actual) {
|
||||
Assert.assertEquals("wrong total hits", expected.totalHits, actual.totalHits);
|
||||
Assert.assertEquals("wrong maxScore", expected.getMaxScore(), actual.getMaxScore(), 0.0);
|
||||
Assert.assertEquals("wrong hit count", expected.scoreDocs.length, actual.scoreDocs.length);
|
||||
for(int hitIDX=0;hitIDX<expected.scoreDocs.length;hitIDX++) {
|
||||
final ScoreDoc expectedSD = expected.scoreDocs[hitIDX];
|
||||
final ScoreDoc actualSD = actual.scoreDocs[hitIDX];
|
||||
Assert.assertEquals("wrong hit docID", expectedSD.doc, actualSD.doc);
|
||||
Assert.assertEquals("wrong hit score", expectedSD.score, actualSD.score, 0.0);
|
||||
if (expectedSD instanceof FieldDoc) {
|
||||
Assert.assertTrue(actualSD instanceof FieldDoc);
|
||||
Assert.assertEquals("wrong sort field values",
|
||||
((FieldDoc) expectedSD).fields,
|
||||
((FieldDoc) actualSD).fields);
|
||||
} else {
|
||||
Assert.assertFalse(actualSD instanceof FieldDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -103,7 +103,7 @@ final class JustCompileSearch {
|
|||
|
||||
}
|
||||
|
||||
static final class JustCompileFieldComparator extends FieldComparator {
|
||||
static final class JustCompileFieldComparator extends FieldComparator<Object> {
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
|
@ -132,7 +132,7 @@ final class JustCompileSearch {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Object value(int slot) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
|
|
@ -139,7 +139,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
|
|||
|
||||
@Override
|
||||
public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
return new FieldComparator() {
|
||||
return new FieldComparator<Integer>() {
|
||||
|
||||
FieldCache.DocTermsIndex idIndex;
|
||||
private final int[] values = new int[numHits];
|
||||
|
@ -184,7 +184,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(values[slot]);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -511,7 +511,7 @@ public class TestSort extends LuceneTestCase {
|
|||
assertMatches (empty, queryX, sort, "");
|
||||
}
|
||||
|
||||
static class MyFieldComparator extends FieldComparator {
|
||||
static class MyFieldComparator extends FieldComparator<Integer> {
|
||||
int[] docValues;
|
||||
int[] slotValues;
|
||||
int bottomValue;
|
||||
|
@ -527,6 +527,7 @@ public class TestSort extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
// values are small enough that overflow won't happen
|
||||
return slotValues[slot1] - slotValues[slot2];
|
||||
}
|
||||
|
||||
|
@ -553,7 +554,7 @@ public class TestSort extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(slotValues[slot]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,244 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestTopDocsMerge extends LuceneTestCase {
|
||||
|
||||
private static class ShardSearcher extends IndexSearcher {
|
||||
private final IndexReader.AtomicReaderContext[] ctx;
|
||||
|
||||
public ShardSearcher(IndexReader.AtomicReaderContext ctx, IndexReader.ReaderContext parent) {
|
||||
super(parent);
|
||||
this.ctx = new IndexReader.AtomicReaderContext[] {ctx};
|
||||
}
|
||||
|
||||
public void search(Weight weight, Collector collector) throws IOException {
|
||||
search(ctx, weight, null, collector);
|
||||
}
|
||||
|
||||
public TopDocs search(Weight weight, int topN) throws IOException {
|
||||
return search(ctx, weight, null, topN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ShardSearcher(" + ctx[0] + ")";
|
||||
}
|
||||
}
|
||||
|
||||
public void testSort() throws Exception {
|
||||
|
||||
IndexReader reader = null;
|
||||
Directory dir = null;
|
||||
|
||||
final int numDocs = atLeast(1000);
|
||||
//final int numDocs = atLeast(50);
|
||||
|
||||
final String[] tokens = new String[] {"a", "b", "c", "d", "e"};
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: make index");
|
||||
}
|
||||
|
||||
{
|
||||
dir = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
|
||||
// w.setDoRandomOptimize(false);
|
||||
|
||||
// w.w.getConfig().setMaxBufferedDocs(atLeast(100));
|
||||
|
||||
final String[] content = new String[atLeast(20)];
|
||||
|
||||
for(int contentIDX=0;contentIDX<content.length;contentIDX++) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
final int numTokens = _TestUtil.nextInt(random, 1, 10);
|
||||
for(int tokenIDX=0;tokenIDX<numTokens;tokenIDX++) {
|
||||
sb.append(tokens[random.nextInt(tokens.length)]).append(' ');
|
||||
}
|
||||
content[contentIDX] = sb.toString();
|
||||
}
|
||||
|
||||
for(int docIDX=0;docIDX<numDocs;docIDX++) {
|
||||
final Document doc = new Document();
|
||||
doc.add(newField("string", _TestUtil.randomRealisticUnicodeString(random), Field.Index.NOT_ANALYZED));
|
||||
doc.add(newField("text", content[random.nextInt(content.length)], Field.Index.ANALYZED));
|
||||
doc.add(new NumericField("float").setFloatValue(random.nextFloat()));
|
||||
final int intValue;
|
||||
if (random.nextInt(100) == 17) {
|
||||
intValue = Integer.MIN_VALUE;
|
||||
} else if (random.nextInt(100) == 17) {
|
||||
intValue = Integer.MAX_VALUE;
|
||||
} else {
|
||||
intValue = random.nextInt();
|
||||
}
|
||||
doc.add(new NumericField("int").setIntValue(intValue));
|
||||
if (VERBOSE) {
|
||||
System.out.println(" doc=" + doc);
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
reader = w.getReader();
|
||||
w.close();
|
||||
}
|
||||
|
||||
// NOTE: sometimes reader has just one segment, which is
|
||||
// important to test
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
IndexReader[] subReaders = searcher.getIndexReader().getSequentialSubReaders();
|
||||
if (subReaders == null) {
|
||||
subReaders = new IndexReader[] {searcher.getIndexReader()};
|
||||
}
|
||||
final ShardSearcher[] subSearchers = new ShardSearcher[subReaders.length];
|
||||
final IndexReader.ReaderContext ctx = searcher.getTopReaderContext();
|
||||
|
||||
if (ctx instanceof IndexReader.AtomicReaderContext) {
|
||||
assert subSearchers.length == 1;
|
||||
subSearchers[0] = new ShardSearcher((IndexReader.AtomicReaderContext) ctx, ctx);
|
||||
} else {
|
||||
final IndexReader.CompositeReaderContext compCTX = (IndexReader.CompositeReaderContext) ctx;
|
||||
for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
|
||||
subSearchers[searcherIDX] = new ShardSearcher(compCTX.leaves[searcherIDX], compCTX);
|
||||
}
|
||||
}
|
||||
|
||||
final List<SortField> sortFields = new ArrayList<SortField>();
|
||||
sortFields.add(new SortField("string", SortField.STRING, true));
|
||||
sortFields.add(new SortField("string", SortField.STRING, false));
|
||||
sortFields.add(new SortField("int", SortField.INT, true));
|
||||
sortFields.add(new SortField("int", SortField.INT, false));
|
||||
sortFields.add(new SortField("float", SortField.FLOAT, true));
|
||||
sortFields.add(new SortField("float", SortField.FLOAT, false));
|
||||
sortFields.add(new SortField(null, SortField.SCORE, true));
|
||||
sortFields.add(new SortField(null, SortField.SCORE, false));
|
||||
sortFields.add(new SortField(null, SortField.DOC, true));
|
||||
sortFields.add(new SortField(null, SortField.DOC, false));
|
||||
|
||||
final int[] docStarts = new int[subSearchers.length];
|
||||
int docBase = 0;
|
||||
for(int subIDX=0;subIDX<docStarts.length;subIDX++) {
|
||||
docStarts[subIDX] = docBase;
|
||||
docBase += subReaders[subIDX].maxDoc();
|
||||
//System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);
|
||||
}
|
||||
|
||||
for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
|
||||
|
||||
// TODO: custom FieldComp...
|
||||
final Query query = new TermQuery(new Term("text", tokens[random.nextInt(tokens.length)]));
|
||||
|
||||
final Sort sort;
|
||||
if (random.nextInt(10) == 4) {
|
||||
// Sort by score
|
||||
sort = null;
|
||||
} else {
|
||||
final SortField[] randomSortFields = new SortField[_TestUtil.nextInt(random, 1, 3)];
|
||||
for(int sortIDX=0;sortIDX<randomSortFields.length;sortIDX++) {
|
||||
randomSortFields[sortIDX] = sortFields.get(random.nextInt(sortFields.size()));
|
||||
}
|
||||
sort = new Sort(randomSortFields);
|
||||
}
|
||||
|
||||
final int numHits = _TestUtil.nextInt(random, 1, numDocs+5);
|
||||
//final int numHits = 5;
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
|
||||
}
|
||||
|
||||
// First search on whole index:
|
||||
final TopDocs topHits;
|
||||
if (sort == null) {
|
||||
topHits = searcher.search(query, numHits);
|
||||
} else {
|
||||
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
|
||||
searcher.search(query, c);
|
||||
topHits = c.topDocs(0, numHits);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
|
||||
if (topHits.scoreDocs != null) {
|
||||
for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
|
||||
final ScoreDoc sd = topHits.scoreDocs[hitIDX];
|
||||
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ... then all shards:
|
||||
final Weight w = query.weight(searcher);
|
||||
|
||||
final TopDocs[] shardHits = new TopDocs[subSearchers.length];
|
||||
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
|
||||
final TopDocs subHits;
|
||||
final ShardSearcher subSearcher = subSearchers[shardIDX];
|
||||
if (sort == null) {
|
||||
subHits = subSearcher.search(w, numHits);
|
||||
} else {
|
||||
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
|
||||
subSearcher.search(w, c);
|
||||
subHits = c.topDocs(0, numHits);
|
||||
}
|
||||
|
||||
shardHits[shardIDX] = subHits;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
|
||||
if (subHits.scoreDocs != null) {
|
||||
for(ScoreDoc sd : subHits.scoreDocs) {
|
||||
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge:
|
||||
final TopDocs.TopDocsAndShards mergedHits = TopDocs.merge(sort, numHits, shardHits);
|
||||
|
||||
if (mergedHits.scoreDocs != null) {
|
||||
// Make sure the returned shards are correct:
|
||||
for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
|
||||
final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
|
||||
assertEquals("doc=" + sd.doc + " wrong shard",
|
||||
ReaderUtil.subIndex(sd.doc, docStarts),
|
||||
mergedHits.shardIndex[hitIDX]);
|
||||
}
|
||||
}
|
||||
|
||||
_TestUtil.assertEquals(topHits, mergedHits);
|
||||
}
|
||||
searcher.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -122,7 +122,7 @@ abstract public class AbstractFirstPassGroupingCollector<GROUP_VALUE_TYPE> exten
|
|||
SearchGroup<GROUP_VALUE_TYPE> searchGroup = new SearchGroup<GROUP_VALUE_TYPE>();
|
||||
searchGroup.groupValue = group.groupValue;
|
||||
if (fillFields) {
|
||||
searchGroup.sortValues = new Comparable[sortFieldCount];
|
||||
searchGroup.sortValues = new Object[sortFieldCount];
|
||||
for(int sortFieldIDX=0;sortFieldIDX<sortFieldCount;sortFieldIDX++) {
|
||||
searchGroup.sortValues[sortFieldIDX] = comparators[sortFieldIDX].value(group.comparatorSlot);
|
||||
}
|
||||
|
|
|
@ -348,7 +348,7 @@ public class BlockGroupingCollector extends Collector {
|
|||
}
|
||||
totalGroupedHitCount += og.count;
|
||||
|
||||
final Comparable[] groupSortValues;
|
||||
final Object[] groupSortValues;
|
||||
|
||||
if (fillSortFields) {
|
||||
groupSortValues = new Comparable[comparators.length];
|
||||
|
|
|
@ -40,13 +40,13 @@ public class GroupDocs<GROUP_VALUE_TYPE> {
|
|||
|
||||
/** Matches the groupSort passed to {@link
|
||||
* AbstractFirstPassGroupingCollector}. */
|
||||
public final Comparable[] groupSortValues;
|
||||
public final Object[] groupSortValues;
|
||||
|
||||
public GroupDocs(float maxScore,
|
||||
int totalHits,
|
||||
ScoreDoc[] scoreDocs,
|
||||
GROUP_VALUE_TYPE groupValue,
|
||||
Comparable[] groupSortValues) {
|
||||
Object[] groupSortValues) {
|
||||
this.maxScore = maxScore;
|
||||
this.totalHits = totalHits;
|
||||
this.scoreDocs = scoreDocs;
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.lucene.search.grouping;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
||||
public class GroupDocsAndShards<GROUP_VALUE_TYPE> extends GroupDocs<GROUP_VALUE_TYPE> {
|
||||
|
||||
public final int[] shardIndex;
|
||||
|
||||
public GroupDocsAndShards(float maxScore,
|
||||
int totalHits,
|
||||
ScoreDoc[] scoreDocs,
|
||||
GROUP_VALUE_TYPE groupValue,
|
||||
Object[] groupSortValues,
|
||||
int[] shardIndex) {
|
||||
super(maxScore, totalHits, scoreDocs, groupValue, groupSortValues);
|
||||
this.shardIndex = shardIndex;
|
||||
}
|
||||
}
|
||||
|
|
@ -17,6 +17,14 @@ package org.apache.lucene.search.grouping;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Represents a group that is found during the first pass search.
|
||||
*
|
||||
|
@ -27,6 +35,287 @@ public class SearchGroup<GROUP_VALUE_TYPE> {
|
|||
/** The value that defines this group */
|
||||
public GROUP_VALUE_TYPE groupValue;
|
||||
|
||||
/** The sort values used during sorting. Can be <code>null</code>. */
|
||||
public Comparable[] sortValues;
|
||||
/** The sort values used during sorting. These are the
|
||||
* groupSort field values of the highest rank document
|
||||
* (by the groupSort) within the group. Can be
|
||||
* <code>null</code> if <code>fillFields=false</code> had
|
||||
* been passed to {@link AbstractFirstPassGroupingCollector#getTopGroups} */
|
||||
public Object[] sortValues;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return("SearchGroup(groupValue=" + groupValue + " sortValues=" + Arrays.toString(sortValues) + ")");
|
||||
}
|
||||
|
||||
private static class ShardIter<T> {
|
||||
public final Iterator<SearchGroup<T>> iter;
|
||||
public final int shardIndex;
|
||||
|
||||
public ShardIter(Collection<SearchGroup<T>> shard, int shardIndex) {
|
||||
this.shardIndex = shardIndex;
|
||||
iter = shard.iterator();
|
||||
assert iter.hasNext();
|
||||
}
|
||||
|
||||
public SearchGroup<T> next() {
|
||||
assert iter.hasNext();
|
||||
final SearchGroup<T> group = iter.next();
|
||||
if (group.sortValues == null) {
|
||||
throw new IllegalArgumentException("group.sortValues is null; you must pass fillFields=true to the first pass collector");
|
||||
}
|
||||
return group;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ShardIter(shard=" + shardIndex + ")";
|
||||
}
|
||||
}
|
||||
|
||||
// Holds all shards currently on the same group
|
||||
private static class MergedGroup<T> {
|
||||
|
||||
// groupValue may be null!
|
||||
public final T groupValue;
|
||||
|
||||
public Object[] topValues;
|
||||
public final List<ShardIter<T>> shards = new ArrayList<ShardIter<T>>();
|
||||
public int minShardIndex;
|
||||
public boolean processed;
|
||||
public boolean inQueue;
|
||||
|
||||
public MergedGroup(T groupValue) {
|
||||
this.groupValue = groupValue;
|
||||
}
|
||||
|
||||
// Only for assert
|
||||
private boolean neverEquals(Object _other) {
|
||||
if (_other instanceof MergedGroup) {
|
||||
MergedGroup other = (MergedGroup) _other;
|
||||
if (groupValue == null) {
|
||||
assert other.groupValue != null;
|
||||
} else {
|
||||
assert !groupValue.equals(other.groupValue);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object _other) {
|
||||
// We never have another MergedGroup instance with
|
||||
// same groupValue
|
||||
assert neverEquals(_other);
|
||||
|
||||
if (_other instanceof MergedGroup) {
|
||||
MergedGroup other = (MergedGroup) _other;
|
||||
if (groupValue == null) {
|
||||
return other == null;
|
||||
} else {
|
||||
return groupValue.equals(other);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if (groupValue == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return groupValue.hashCode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class GroupComparator<T> implements Comparator<MergedGroup<T>> {
|
||||
|
||||
public final FieldComparator[] comparators;
|
||||
public final int[] reversed;
|
||||
|
||||
public GroupComparator(Sort groupSort) throws IOException {
|
||||
final SortField[] sortFields = groupSort.getSort();
|
||||
comparators = new FieldComparator[sortFields.length];
|
||||
reversed = new int[sortFields.length];
|
||||
for (int compIDX = 0; compIDX < sortFields.length; compIDX++) {
|
||||
final SortField sortField = sortFields[compIDX];
|
||||
comparators[compIDX] = sortField.getComparator(1, compIDX);
|
||||
reversed[compIDX] = sortField.getReverse() ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public int compare(MergedGroup<T> group, MergedGroup<T> other) {
|
||||
if (group == other) {
|
||||
return 0;
|
||||
}
|
||||
//System.out.println("compare group=" + group + " other=" + other);
|
||||
final Object[] groupValues = group.topValues;
|
||||
final Object[] otherValues = other.topValues;
|
||||
//System.out.println(" groupValues=" + groupValues + " otherValues=" + otherValues);
|
||||
for (int compIDX = 0;compIDX < comparators.length; compIDX++) {
|
||||
final int c = reversed[compIDX] * comparators[compIDX].compareValues(groupValues[compIDX],
|
||||
otherValues[compIDX]);
|
||||
if (c != 0) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
// Tie break by min shard index:
|
||||
assert group.minShardIndex != other.minShardIndex;
|
||||
return group.minShardIndex - other.minShardIndex;
|
||||
}
|
||||
}
|
||||
|
||||
private static class GroupMerger<T> {
|
||||
|
||||
private final GroupComparator<T> groupComp;
|
||||
private final SortedSet<MergedGroup<T>> queue;
|
||||
private final Map<T,MergedGroup<T>> groupsSeen;
|
||||
|
||||
public GroupMerger(Sort groupSort) throws IOException {
|
||||
groupComp = new GroupComparator<T>(groupSort);
|
||||
queue = new TreeSet<MergedGroup<T>>(groupComp);
|
||||
groupsSeen = new HashMap<T,MergedGroup<T>>();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void updateNextGroup(int topN, ShardIter<T> shard) {
|
||||
while(shard.iter.hasNext()) {
|
||||
final SearchGroup<T> group = shard.next();
|
||||
MergedGroup<T> mergedGroup = groupsSeen.get(group.groupValue);
|
||||
final boolean isNew = mergedGroup == null;
|
||||
//System.out.println(" next group=" + (group.groupValue == null ? "null" : ((BytesRef) group.groupValue).utf8ToString()) + " sort=" + Arrays.toString(group.sortValues));
|
||||
|
||||
if (isNew) {
|
||||
// Start a new group:
|
||||
//System.out.println(" new");
|
||||
mergedGroup = new MergedGroup<T>(group.groupValue);
|
||||
mergedGroup.minShardIndex = shard.shardIndex;
|
||||
assert group.sortValues != null;
|
||||
mergedGroup.topValues = group.sortValues;
|
||||
groupsSeen.put(group.groupValue, mergedGroup);
|
||||
mergedGroup.inQueue = true;
|
||||
queue.add(mergedGroup);
|
||||
} else if (mergedGroup.processed) {
|
||||
// This shard produced a group that we already
|
||||
// processed; move on to next group...
|
||||
continue;
|
||||
} else {
|
||||
//System.out.println(" old");
|
||||
boolean competes = false;
|
||||
for(int compIDX=0;compIDX<groupComp.comparators.length;compIDX++) {
|
||||
final int cmp = groupComp.reversed[compIDX] * groupComp.comparators[compIDX].compareValues(group.sortValues[compIDX],
|
||||
mergedGroup.topValues[compIDX]);
|
||||
if (cmp < 0) {
|
||||
// Definitely competes
|
||||
competes = true;
|
||||
break;
|
||||
} else if (cmp > 0) {
|
||||
// Definitely does not compete
|
||||
break;
|
||||
} else if (compIDX == groupComp.comparators.length-1) {
|
||||
if (shard.shardIndex < mergedGroup.minShardIndex) {
|
||||
competes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println(" competes=" + competes);
|
||||
|
||||
if (competes) {
|
||||
// Group's sort changed -- remove & re-insert
|
||||
if (mergedGroup.inQueue) {
|
||||
queue.remove(mergedGroup);
|
||||
}
|
||||
mergedGroup.topValues = group.sortValues;
|
||||
mergedGroup.minShardIndex = shard.shardIndex;
|
||||
queue.add(mergedGroup);
|
||||
mergedGroup.inQueue = true;
|
||||
}
|
||||
}
|
||||
|
||||
mergedGroup.shards.add(shard);
|
||||
break;
|
||||
}
|
||||
|
||||
// Prune un-competitive groups:
|
||||
while(queue.size() > topN) {
|
||||
// TODO java 1.6: .pollLast
|
||||
final MergedGroup<T> group = queue.last();
|
||||
//System.out.println("PRUNE: " + group);
|
||||
queue.remove(group);
|
||||
group.inQueue = false;
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<SearchGroup<T>> merge(List<Collection<SearchGroup<T>>> shards, int offset, int topN) {
|
||||
|
||||
final int maxQueueSize = offset + topN;
|
||||
|
||||
//System.out.println("merge");
|
||||
// Init queue:
|
||||
for(int shardIDX=0;shardIDX<shards.size();shardIDX++) {
|
||||
final Collection<SearchGroup<T>> shard = shards.get(shardIDX);
|
||||
if (!shard.isEmpty()) {
|
||||
//System.out.println(" insert shard=" + shardIDX);
|
||||
updateNextGroup(maxQueueSize, new ShardIter<T>(shard, shardIDX));
|
||||
}
|
||||
}
|
||||
|
||||
// Pull merged topN groups:
|
||||
final List<SearchGroup<T>> newTopGroups = new ArrayList<SearchGroup<T>>();
|
||||
|
||||
int count = 0;
|
||||
|
||||
while(queue.size() != 0) {
|
||||
// TODO Java 1.6: pollFirst()
|
||||
final MergedGroup<T> group = queue.first();
|
||||
queue.remove(group);
|
||||
group.processed = true;
|
||||
//System.out.println(" pop: shards=" + group.shards + " group=" + (group.groupValue == null ? "null" : (((BytesRef) group.groupValue).utf8ToString())) + " sortValues=" + Arrays.toString(group.topValues));
|
||||
if (count++ >= offset) {
|
||||
final SearchGroup<T> newGroup = new SearchGroup<T>();
|
||||
newGroup.groupValue = group.groupValue;
|
||||
newGroup.sortValues = group.topValues;
|
||||
newTopGroups.add(newGroup);
|
||||
if (newTopGroups.size() == topN) {
|
||||
break;
|
||||
}
|
||||
//} else {
|
||||
// System.out.println(" skip < offset");
|
||||
}
|
||||
|
||||
// Advance all iters in this group:
|
||||
for(ShardIter<T> shardIter : group.shards) {
|
||||
updateNextGroup(maxQueueSize, shardIter);
|
||||
}
|
||||
}
|
||||
|
||||
if (newTopGroups.size() == 0) {
|
||||
return null;
|
||||
} else {
|
||||
return newTopGroups;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Merges multiple collections of top groups, for example
|
||||
* obtained from separate index shards. The provided
|
||||
* groupSort must match how the groups were sorted, and
|
||||
* the provided SearchGroups must have been computed
|
||||
* with fillFields=true passed to {@link
|
||||
* AbstractFirstPassGroupingCollector#getTopGroups}.
|
||||
*
|
||||
* <p>NOTE: this returns null if the topGroups is empty.
|
||||
*/
|
||||
public static <T> Collection<SearchGroup<T>> merge(List<Collection<SearchGroup<T>>> topGroups, int offset, int topN, Sort groupSort)
|
||||
throws IOException {
|
||||
if (topGroups.size() == 0) {
|
||||
return null;
|
||||
} else {
|
||||
return new GroupMerger<T>(groupSort).merge(topGroups, offset, topN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -19,6 +17,14 @@ import org.apache.lucene.search.SortField;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
||||
/** Represents result returned by a grouping search.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
@ -58,4 +64,113 @@ public class TopGroups<GROUP_VALUE_TYPE> {
|
|||
this.groups = oldTopGroups.groups;
|
||||
this.totalGroupCount = totalGroupCount;
|
||||
}
|
||||
|
||||
/** Merges an array of TopGroups, for example obtained
|
||||
* from the second-pass collector across multiple
|
||||
* shards. Each TopGroups must have been sorted by the
|
||||
* same groupSort and docSort, and the top groups passed
|
||||
* to all second-pass collectors must be the same.
|
||||
*
|
||||
* <b>NOTE</b>: this cannot merge totalGroupCount; ie the
|
||||
* returned TopGroups will have null totalGroupCount.
|
||||
*
|
||||
* <b>NOTE</b>: the topDocs in each GroupDocs is actually
|
||||
* an instance of TopDocsAndShards
|
||||
*/
|
||||
public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN)
|
||||
throws IOException {
|
||||
|
||||
//System.out.println("TopGroups.merge");
|
||||
|
||||
if (shardGroups.length == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int totalHitCount = 0;
|
||||
int totalGroupedHitCount = 0;
|
||||
|
||||
final int numGroups = shardGroups[0].groups.length;
|
||||
for(TopGroups<T> shard : shardGroups) {
|
||||
if (numGroups != shard.groups.length) {
|
||||
throw new IllegalArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
|
||||
}
|
||||
totalHitCount += shard.totalHitCount;
|
||||
totalGroupedHitCount += shard.totalGroupedHitCount;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
final GroupDocs<T>[] mergedGroupDocs = new GroupDocs[numGroups];
|
||||
|
||||
final TopDocs[] shardTopDocs = new TopDocs[shardGroups.length];
|
||||
|
||||
for(int groupIDX=0;groupIDX<numGroups;groupIDX++) {
|
||||
final T groupValue = shardGroups[0].groups[groupIDX].groupValue;
|
||||
//System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
|
||||
float maxScore = Float.MIN_VALUE;
|
||||
int totalHits = 0;
|
||||
for(int shardIDX=0;shardIDX<shardGroups.length;shardIDX++) {
|
||||
//System.out.println(" shard=" + shardIDX);
|
||||
final TopGroups<T> shard = shardGroups[shardIDX];
|
||||
final GroupDocs shardGroupDocs = shard.groups[groupIDX];
|
||||
if (groupValue == null) {
|
||||
if (shardGroupDocs.groupValue != null) {
|
||||
throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
|
||||
}
|
||||
} else if (!groupValue.equals(shardGroupDocs.groupValue)) {
|
||||
throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
|
||||
}
|
||||
|
||||
/*
|
||||
for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
|
||||
System.out.println(" doc=" + sd.doc);
|
||||
}
|
||||
*/
|
||||
|
||||
shardTopDocs[shardIDX] = new TopDocs(shardGroupDocs.totalHits,
|
||||
shardGroupDocs.scoreDocs,
|
||||
shardGroupDocs.maxScore);
|
||||
maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
|
||||
totalHits += shardGroupDocs.totalHits;
|
||||
}
|
||||
|
||||
final TopDocs.TopDocsAndShards mergedTopDocs = TopDocs.merge(docSort, docOffset + docTopN, shardTopDocs);
|
||||
|
||||
// Slice;
|
||||
final ScoreDoc[] mergedScoreDocs;
|
||||
final int[] mergedShardIndex;
|
||||
if (docOffset == 0) {
|
||||
mergedScoreDocs = mergedTopDocs.scoreDocs;
|
||||
mergedShardIndex = mergedTopDocs.shardIndex;
|
||||
} else if (docOffset >= mergedTopDocs.scoreDocs.length) {
|
||||
mergedScoreDocs = new ScoreDoc[0];
|
||||
mergedShardIndex = new int[0];
|
||||
} else {
|
||||
mergedScoreDocs = new ScoreDoc[mergedTopDocs.scoreDocs.length - docOffset];
|
||||
System.arraycopy(mergedTopDocs.scoreDocs,
|
||||
docOffset,
|
||||
mergedScoreDocs,
|
||||
0,
|
||||
mergedTopDocs.scoreDocs.length - docOffset);
|
||||
mergedShardIndex = new int[mergedTopDocs.scoreDocs.length - docOffset];
|
||||
System.arraycopy(mergedTopDocs.shardIndex,
|
||||
docOffset,
|
||||
mergedShardIndex,
|
||||
0,
|
||||
mergedTopDocs.scoreDocs.length - docOffset);
|
||||
}
|
||||
//System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
|
||||
mergedGroupDocs[groupIDX] = new GroupDocsAndShards<T>(maxScore,
|
||||
totalHits,
|
||||
mergedScoreDocs,
|
||||
groupValue,
|
||||
shardGroups[0].groups[groupIDX].groupSortValues,
|
||||
mergedShardIndex);
|
||||
}
|
||||
|
||||
return new TopGroups<T>(groupSort.getSort(),
|
||||
docSort == null ? null : docSort.getSort(),
|
||||
totalHitCount,
|
||||
totalGroupedHitCount,
|
||||
mergedGroupDocs);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -28,11 +31,9 @@ import org.apache.lucene.search.*;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
// TODO
|
||||
// - should test relevance sort too
|
||||
// - test null
|
||||
|
@ -244,7 +245,6 @@ public class TestGrouping extends LuceneTestCase {
|
|||
return fields;
|
||||
}
|
||||
|
||||
/*
|
||||
private String groupToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
|
@ -252,7 +252,6 @@ public class TestGrouping extends LuceneTestCase {
|
|||
return b.utf8ToString();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
private TopGroups<BytesRef> slowGrouping(GroupDoc[] groupDocs,
|
||||
String searchTerm,
|
||||
|
@ -418,6 +417,38 @@ public class TestGrouping extends LuceneTestCase {
|
|||
return r;
|
||||
}
|
||||
|
||||
private static class ShardState {
|
||||
|
||||
public final ShardSearcher[] subSearchers;
|
||||
public final int[] docStarts;
|
||||
|
||||
public ShardState(IndexSearcher s) {
|
||||
IndexReader[] subReaders = s.getIndexReader().getSequentialSubReaders();
|
||||
if (subReaders == null) {
|
||||
subReaders = new IndexReader[] {s.getIndexReader()};
|
||||
}
|
||||
subSearchers = new ShardSearcher[subReaders.length];
|
||||
final IndexReader.ReaderContext ctx = s.getTopReaderContext();
|
||||
if (ctx instanceof IndexReader.AtomicReaderContext) {
|
||||
assert subSearchers.length == 1;
|
||||
subSearchers[0] = new ShardSearcher((IndexReader.AtomicReaderContext) ctx, ctx);
|
||||
} else {
|
||||
final IndexReader.CompositeReaderContext compCTX = (IndexReader.CompositeReaderContext) ctx;
|
||||
for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
|
||||
subSearchers[searcherIDX] = new ShardSearcher(compCTX.leaves[searcherIDX], compCTX);
|
||||
}
|
||||
}
|
||||
|
||||
docStarts = new int[subSearchers.length];
|
||||
int docBase = 0;
|
||||
for(int subIDX=0;subIDX<docStarts.length;subIDX++) {
|
||||
docStarts[subIDX] = docBase;
|
||||
docBase += subReaders[subIDX].maxDoc();
|
||||
//System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
for(int iter=0;iter<3;iter++) {
|
||||
|
||||
|
@ -525,7 +556,8 @@ public class TestGrouping extends LuceneTestCase {
|
|||
Directory dir2 = null;
|
||||
|
||||
try {
|
||||
final IndexSearcher s = new IndexSearcher(r);
|
||||
final IndexSearcher s = newSearcher(r);
|
||||
final ShardState shards = new ShardState(s);
|
||||
|
||||
for(int contentID=0;contentID<3;contentID++) {
|
||||
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
|
||||
|
@ -549,7 +581,8 @@ public class TestGrouping extends LuceneTestCase {
|
|||
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
|
||||
final int[] docIDToID2 = FieldCache.DEFAULT.getInts(r2, "id");
|
||||
|
||||
final IndexSearcher s2 = new IndexSearcher(r2);
|
||||
final IndexSearcher s2 = newSearcher(r2);
|
||||
final ShardState shards2 = new ShardState(s2);
|
||||
|
||||
// Reader2 only increases maxDoc() vs reader, which
|
||||
// means a monotonic shift in scores, so we can
|
||||
|
@ -604,7 +637,7 @@ public class TestGrouping extends LuceneTestCase {
|
|||
}
|
||||
|
||||
final int topNGroups = _TestUtil.nextInt(random, 1, 30);
|
||||
//final int topNGroups = 4;
|
||||
//final int topNGroups = 10;
|
||||
final int docsPerGroup = _TestUtil.nextInt(random, 1, 50);
|
||||
|
||||
final int groupOffset = _TestUtil.nextInt(random, 0, (topNGroups-1)/2);
|
||||
|
@ -658,7 +691,9 @@ public class TestGrouping extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
s.search(new TermQuery(new Term("content", searchTerm)), c);
|
||||
// Search top reader:
|
||||
final Query q = new TermQuery(new Term("content", searchTerm));
|
||||
s.search(q, c);
|
||||
|
||||
if (doCache && !useWrappingCollector) {
|
||||
if (cCache.isCached()) {
|
||||
|
@ -679,6 +714,18 @@ public class TestGrouping extends LuceneTestCase {
|
|||
|
||||
final Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
|
||||
final TopGroups groupsResult;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: topGroups:");
|
||||
if (topGroups == null) {
|
||||
System.out.println(" null");
|
||||
} else {
|
||||
for(SearchGroup<BytesRef> groupx : topGroups) {
|
||||
System.out.println(" " + groupToString(groupx.groupValue) + " sort=" + Arrays.toString(groupx.sortValues));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
|
||||
|
||||
if (topGroups != null) {
|
||||
|
||||
|
@ -734,7 +781,13 @@ public class TestGrouping extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
assertEquals(docIDToID, expectedGroups, groupsResult, true, getScores);
|
||||
assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores);
|
||||
|
||||
// Confirm merged shards match:
|
||||
assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores);
|
||||
if (topGroupsShards != null) {
|
||||
verifyShards(shards.docStarts, topGroupsShards);
|
||||
}
|
||||
|
||||
final boolean needsScores = getScores || getMaxScores || docSort == null;
|
||||
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
|
||||
|
@ -758,6 +811,8 @@ public class TestGrouping extends LuceneTestCase {
|
|||
groupsResult2 = tempTopGroups2;
|
||||
}
|
||||
|
||||
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(s2, shards2.subSearchers, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
|
||||
|
||||
if (expectedGroups != null) {
|
||||
// Fixup scores for reader2
|
||||
for (GroupDocs groupDocsHits : expectedGroups.groups) {
|
||||
|
@ -799,8 +854,11 @@ public class TestGrouping extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
assertEquals(docIDToID2, expectedGroups, groupsResult2, false, getScores);
|
||||
assertEquals(docIDToID2, expectedGroups, groupsResult2, false, true, true, getScores);
|
||||
assertEquals(docIDToID2, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores);
|
||||
}
|
||||
s.close();
|
||||
s2.close();
|
||||
} finally {
|
||||
FieldCache.DEFAULT.purge(r);
|
||||
if (r2 != null) {
|
||||
|
@ -816,7 +874,95 @@ public class TestGrouping extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues, boolean testScores) {
|
||||
private void verifyShards(int[] docStarts, TopGroups<BytesRef> topGroups) {
|
||||
for(GroupDocs group : topGroups.groups) {
|
||||
assertTrue(group instanceof GroupDocsAndShards);
|
||||
GroupDocsAndShards withShards = (GroupDocsAndShards) group;
|
||||
for(int hitIDX=0;hitIDX<withShards.scoreDocs.length;hitIDX++) {
|
||||
final ScoreDoc sd = withShards.scoreDocs[hitIDX];
|
||||
assertEquals("doc=" + sd.doc + " wrong shard",
|
||||
ReaderUtil.subIndex(sd.doc, docStarts),
|
||||
withShards.shardIndex[hitIDX]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertEquals(Collection<SearchGroup<BytesRef>> groups1, Collection<SearchGroup<BytesRef>> groups2, boolean doSortValues) {
|
||||
assertEquals(groups1.size(), groups2.size());
|
||||
final Iterator<SearchGroup<BytesRef>> iter1 = groups1.iterator();
|
||||
final Iterator<SearchGroup<BytesRef>> iter2 = groups2.iterator();
|
||||
|
||||
while(iter1.hasNext()) {
|
||||
assertTrue(iter2.hasNext());
|
||||
|
||||
SearchGroup<BytesRef> group1 = iter1.next();
|
||||
SearchGroup<BytesRef> group2 = iter2.next();
|
||||
|
||||
assertEquals(group1.groupValue, group2.groupValue);
|
||||
if (doSortValues) {
|
||||
assertEquals(group1.sortValues, group2.sortValues);
|
||||
}
|
||||
}
|
||||
assertFalse(iter2.hasNext());
|
||||
}
|
||||
|
||||
private TopGroups<BytesRef> searchShards(IndexSearcher topSearcher, ShardSearcher[] subSearchers, Query query, Sort groupSort, Sort docSort, int groupOffset, int topNGroups, int docOffset,
|
||||
int topNDocs, boolean getScores, boolean getMaxScores) throws Exception {
|
||||
|
||||
// TODO: swap in caching, all groups collector here
|
||||
// too...
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: " + subSearchers.length + " shards: " + Arrays.toString(subSearchers));
|
||||
}
|
||||
// Run 1st pass collector to get top groups per shard
|
||||
final Weight w = query.weight(topSearcher);
|
||||
final List<Collection<SearchGroup<BytesRef>>> shardGroups = new ArrayList<Collection<SearchGroup<BytesRef>>>();
|
||||
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
|
||||
final TermFirstPassGroupingCollector c = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
|
||||
subSearchers[shardIDX].search(w, c);
|
||||
final Collection<SearchGroup<BytesRef>> topGroups = c.getTopGroups(0, true);
|
||||
if (topGroups != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" shard " + shardIDX + " s=" + subSearchers[shardIDX] + " " + topGroups.size() + " groups:");
|
||||
for(SearchGroup<BytesRef> group : topGroups) {
|
||||
System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues));
|
||||
}
|
||||
}
|
||||
shardGroups.add(topGroups);
|
||||
}
|
||||
}
|
||||
|
||||
final Collection<SearchGroup<BytesRef>> mergedTopGroups = SearchGroup.merge(shardGroups, groupOffset, topNGroups, groupSort);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" merged:");
|
||||
if (mergedTopGroups == null) {
|
||||
System.out.println(" null");
|
||||
} else {
|
||||
for(SearchGroup<BytesRef> group : mergedTopGroups) {
|
||||
System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mergedTopGroups != null) {
|
||||
|
||||
// Now 2nd pass:
|
||||
@SuppressWarnings("unchecked")
|
||||
final TopGroups<BytesRef>[] shardTopGroups = new TopGroups[subSearchers.length];
|
||||
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
|
||||
final TermSecondPassGroupingCollector c = new TermSecondPassGroupingCollector("group", mergedTopGroups, groupSort, docSort,
|
||||
docOffset + topNDocs, getScores, getMaxScores, true);
|
||||
subSearchers[shardIDX].search(w, c);
|
||||
shardTopGroups[shardIDX] = c.getTopGroups(0);
|
||||
}
|
||||
|
||||
return TopGroups.merge(shardTopGroups, groupSort, docSort, docOffset, topNDocs);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues, boolean verifyTotalGroupCount, boolean verifySortValues, boolean testScores) {
|
||||
if (expected == null) {
|
||||
assertNull(actual);
|
||||
return;
|
||||
|
@ -826,7 +972,7 @@ public class TestGrouping extends LuceneTestCase {
|
|||
assertEquals(expected.groups.length, actual.groups.length);
|
||||
assertEquals(expected.totalHitCount, actual.totalHitCount);
|
||||
assertEquals(expected.totalGroupedHitCount, actual.totalGroupedHitCount);
|
||||
if (expected.totalGroupCount != null) {
|
||||
if (expected.totalGroupCount != null && verifyTotalGroupCount) {
|
||||
assertEquals(expected.totalGroupCount, actual.totalGroupCount);
|
||||
}
|
||||
|
||||
|
@ -839,7 +985,9 @@ public class TestGrouping extends LuceneTestCase {
|
|||
if (verifyGroupValues) {
|
||||
assertEquals(expectedGroup.groupValue, actualGroup.groupValue);
|
||||
}
|
||||
if (verifySortValues) {
|
||||
assertArrayEquals(expectedGroup.groupSortValues, actualGroup.groupSortValues);
|
||||
}
|
||||
|
||||
// TODO
|
||||
// assertEquals(expectedGroup.maxScore, actualGroup.maxScore);
|
||||
|
@ -860,8 +1008,32 @@ public class TestGrouping extends LuceneTestCase {
|
|||
// TODO: too anal for now
|
||||
//assertEquals(Float.NaN, actualFD.score);
|
||||
}
|
||||
if (verifySortValues) {
|
||||
assertArrayEquals(expectedFD.fields, actualFD.fields);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class ShardSearcher extends IndexSearcher {
|
||||
private final IndexReader.AtomicReaderContext[] ctx;
|
||||
|
||||
public ShardSearcher(IndexReader.AtomicReaderContext ctx, IndexReader.ReaderContext parent) {
|
||||
super(parent);
|
||||
this.ctx = new IndexReader.AtomicReaderContext[] {ctx};
|
||||
}
|
||||
|
||||
public void search(Weight weight, Collector collector) throws IOException {
|
||||
search(ctx, weight, null, collector);
|
||||
}
|
||||
|
||||
public TopDocs search(Weight weight, int topN) throws IOException {
|
||||
return search(ctx, weight, null, topN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ShardSearcher(" + ctx[0] + ")";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -476,8 +476,8 @@ class ElevationComparatorSource extends FieldComparatorSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
return new FieldComparator() {
|
||||
public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
return new FieldComparator<Integer>() {
|
||||
|
||||
FieldCache.DocTermsIndex idIndex;
|
||||
private final int[] values = new int[numHits];
|
||||
|
@ -517,7 +517,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable value(int slot) {
|
||||
public Integer value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
};
|
||||
|
|
|
@ -102,8 +102,8 @@ public class RandomSortField extends FieldType {
|
|||
|
||||
private static FieldComparatorSource randomComparatorSource = new FieldComparatorSource() {
|
||||
@Override
|
||||
public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
return new FieldComparator() {
|
||||
public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
return new FieldComparator<Integer>() {
|
||||
int seed;
|
||||
private final int[] values = new int[numHits];
|
||||
int bottomVal;
|
||||
|
@ -135,7 +135,7 @@ public class RandomSortField extends FieldType {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable value(int slot) {
|
||||
public Integer value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
};
|
||||
|
|
|
@ -56,7 +56,7 @@ public class MissingStringLastComparatorSource extends FieldComparatorSource {
|
|||
|
||||
// Copied from Lucene's TermOrdValComparator and modified since the Lucene version couldn't
|
||||
// be extended.
|
||||
class TermOrdValComparator_SML extends FieldComparator {
|
||||
class TermOrdValComparator_SML extends FieldComparator<BytesRef> {
|
||||
private static final int NULL_ORD = Integer.MAX_VALUE;
|
||||
|
||||
private final int[] ords;
|
||||
|
@ -98,7 +98,7 @@ class TermOrdValComparator_SML extends FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public BytesRef value(int slot) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
@ -111,7 +111,7 @@ class TermOrdValComparator_SML extends FieldComparator {
|
|||
// ords) per-segment comparator. NOTE: this is messy;
|
||||
// we do this only because hotspot can't reliably inline
|
||||
// the underlying array access when looking up doc->ord
|
||||
private static abstract class PerSegmentComparator extends FieldComparator {
|
||||
private static abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
|
||||
protected TermOrdValComparator_SML parent;
|
||||
protected final int[] ords;
|
||||
protected final BytesRef[] values;
|
||||
|
@ -199,7 +199,7 @@ class TermOrdValComparator_SML extends FieldComparator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
public BytesRef value(int slot) {
|
||||
return values==null ? parent.NULL_VAL : values[slot];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,7 +141,7 @@ public abstract class ValueSource implements Serializable {
|
|||
* off of the {@link org.apache.solr.search.function.DocValues} for a ValueSource
|
||||
* instead of the normal Lucene FieldComparator that works off of a FieldCache.
|
||||
*/
|
||||
class ValueSourceComparator extends FieldComparator {
|
||||
class ValueSourceComparator extends FieldComparator<Double> {
|
||||
private final double[] values;
|
||||
private DocValues docVals;
|
||||
private double bottom;
|
||||
|
@ -195,7 +195,7 @@ public abstract class ValueSource implements Serializable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Comparable value(int slot) {
|
||||
public Double value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue