LUCENE-5399: add missingFirst/last support when sorting by Type.STRING; speed up deep paging; fix solr's distributed group sort for certain field types

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1558865 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-01-16 17:40:25 +00:00
commit 7b34ac5c61
25 changed files with 853 additions and 649 deletions

View File

@ -97,6 +97,12 @@ New Features
AnalyzingInfixSuggester but boosts suggestions that matched tokens AnalyzingInfixSuggester but boosts suggestions that matched tokens
with lower positions. (Remi Melisson via Mike McCandless) with lower positions. (Remi Melisson via Mike McCandless)
* LUCENE-4399: When sorting by String (SortField.STRING), you can now
specify whether missing values should be sorted first (the default),
using SortField.setMissingValue(SortField.STRING_FIRST), or last,
using SortField.setMissingValue(SortField.STRING_LAST). (Rob Muir,
Mike McCandless)
Build Build
@ -183,6 +189,9 @@ Changes in Runtime Behavior
AlreadyClosedException if the refCount in incremented but AlreadyClosedException if the refCount in incremented but
is less that 1. (Simon Willnauer) is less that 1. (Simon Willnauer)
* LUCENE-4399: Deep paging using IndexSearcher.searchAfter when
sorting by fields is faster (Rob Muir, Mike McCandless)
Documentation Documentation
* LUCENE-5384: Add some tips for making tokenfilters and tokenizers * LUCENE-5384: Add some tips for making tokenfilters and tokenizers

View File

@ -62,6 +62,18 @@ import org.apache.lucene.util.BytesRef;
* <li> {@link #compareBottom} Compare a new hit (docID) * <li> {@link #compareBottom} Compare a new hit (docID)
* against the "weakest" (bottom) entry in the queue. * against the "weakest" (bottom) entry in the queue.
* *
* <li> {@link #setTopValue} This method is called by
* {@link TopFieldCollector} to notify the
* FieldComparator of the top most value, which is
* used by future calls to {@link #compareTop}.
*
* <li> {@link #compareBottom} Compare a new hit (docID)
* against the "weakest" (bottom) entry in the queue.
*
* <li> {@link #compareTop} Compare a new hit (docID)
* against the top value previously set by a call to
* {@link #setTopValue}.
*
* <li> {@link #copy} Installs a new hit into the * <li> {@link #copy} Installs a new hit into the
* priority queue. The {@link FieldValueHitQueue} * priority queue. The {@link FieldValueHitQueue}
* calls this method when a new hit is competitive. * calls this method when a new hit is competitive.
@ -104,7 +116,15 @@ public abstract class FieldComparator<T> {
public abstract void setBottom(final int slot); public abstract void setBottom(final int slot);
/** /**
* Compare the bottom of the queue with doc. This will * Record the top value, for future calls to {@link
* #compareTop}. This is only called for searches that
* use searchAfter (deep paging), and is called before any
* calls to {@link #setNextReader}.
*/
public abstract void setTopValue(T value);
/**
* Compare the bottom of the queue with this doc. This will
* only invoked after setBottom has been called. This * only invoked after setBottom has been called. This
* should return the same result as {@link * should return the same result as {@link
* #compare(int,int)}} as if bottom were slot1 and the new * #compare(int,int)}} as if bottom were slot1 and the new
@ -122,6 +142,22 @@ public abstract class FieldComparator<T> {
*/ */
public abstract int compareBottom(int doc) throws IOException; public abstract int compareBottom(int doc) throws IOException;
/**
* Compare the top value with this doc. This will
* only invoked after setTopValue has been called. This
* should return the same result as {@link
* #compare(int,int)}} as if topValue were slot1 and the new
* document were slot 2. This is only called for searches that
* use searchAfter (deep paging).
*
* @param doc that was hit
* @return any N < 0 if the doc's value is sorted after
* the bottom entry (not competitive), any N > 0 if the
* doc's value is sorted before the bottom entry and 0 if
* they are equal.
*/
public abstract int compareTop(int doc) throws IOException;
/** /**
* This method is called when a new hit is competitive. * This method is called when a new hit is competitive.
* You should copy any state associated with this document * You should copy any state associated with this document
@ -184,10 +220,6 @@ public abstract class FieldComparator<T> {
} }
} }
/** Returns negative result if the doc's value is less
* than the provided value. */
public abstract int compareDocToValue(int doc, T value) throws IOException;
/** /**
* Base FieldComparator class for numeric types * Base FieldComparator class for numeric types
*/ */
@ -223,6 +255,7 @@ public abstract class FieldComparator<T> {
private final DoubleParser parser; private final DoubleParser parser;
private FieldCache.Doubles currentReaderValues; private FieldCache.Doubles currentReaderValues;
private double bottom; private double bottom;
private double topValue;
DoubleComparator(int numHits, String field, FieldCache.Parser parser, Double missingValue) { DoubleComparator(int numHits, String field, FieldCache.Parser parser, Double missingValue) {
super(field, missingValue); super(field, missingValue);
@ -272,21 +305,25 @@ public abstract class FieldComparator<T> {
this.bottom = values[bottom]; this.bottom = values[bottom];
} }
@Override
public void setTopValue(Double value) {
topValue = value;
}
@Override @Override
public Double value(int slot) { public Double value(int slot) {
return Double.valueOf(values[slot]); return Double.valueOf(values[slot]);
} }
@Override @Override
public int compareDocToValue(int doc, Double valueObj) { public int compareTop(int doc) {
final double value = valueObj.doubleValue();
double docValue = currentReaderValues.get(doc); double docValue = currentReaderValues.get(doc);
// Test for docValue == 0 to save Bits.get method call for // Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero): // the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue; docValue = missingValue;
} }
return Double.compare(docValue, value); return Double.compare(topValue, docValue);
} }
} }
@ -297,6 +334,7 @@ public abstract class FieldComparator<T> {
private final FloatParser parser; private final FloatParser parser;
private FieldCache.Floats currentReaderValues; private FieldCache.Floats currentReaderValues;
private float bottom; private float bottom;
private float topValue;
FloatComparator(int numHits, String field, FieldCache.Parser parser, Float missingValue) { FloatComparator(int numHits, String field, FieldCache.Parser parser, Float missingValue) {
super(field, missingValue); super(field, missingValue);
@ -347,21 +385,25 @@ public abstract class FieldComparator<T> {
this.bottom = values[bottom]; this.bottom = values[bottom];
} }
@Override
public void setTopValue(Float value) {
topValue = value;
}
@Override @Override
public Float value(int slot) { public Float value(int slot) {
return Float.valueOf(values[slot]); return Float.valueOf(values[slot]);
} }
@Override @Override
public int compareDocToValue(int doc, Float valueObj) { public int compareTop(int doc) {
final float value = valueObj.floatValue();
float docValue = currentReaderValues.get(doc); float docValue = currentReaderValues.get(doc);
// Test for docValue == 0 to save Bits.get method call for // Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero): // the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue; docValue = missingValue;
} }
return Float.compare(docValue, value); return Float.compare(topValue, docValue);
} }
} }
@ -372,6 +414,7 @@ public abstract class FieldComparator<T> {
private final IntParser parser; private final IntParser parser;
private FieldCache.Ints currentReaderValues; private FieldCache.Ints currentReaderValues;
private int bottom; // Value of bottom of queue private int bottom; // Value of bottom of queue
private int topValue;
IntComparator(int numHits, String field, FieldCache.Parser parser, Integer missingValue) { IntComparator(int numHits, String field, FieldCache.Parser parser, Integer missingValue) {
super(field, missingValue); super(field, missingValue);
@ -421,21 +464,25 @@ public abstract class FieldComparator<T> {
this.bottom = values[bottom]; this.bottom = values[bottom];
} }
@Override
public void setTopValue(Integer value) {
topValue = value;
}
@Override @Override
public Integer value(int slot) { public Integer value(int slot) {
return Integer.valueOf(values[slot]); return Integer.valueOf(values[slot]);
} }
@Override @Override
public int compareDocToValue(int doc, Integer valueObj) { public int compareTop(int doc) {
final int value = valueObj.intValue();
int docValue = currentReaderValues.get(doc); int docValue = currentReaderValues.get(doc);
// Test for docValue == 0 to save Bits.get method call for // Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero): // the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue; docValue = missingValue;
} }
return Integer.compare(docValue, value); return Integer.compare(topValue, docValue);
} }
} }
@ -446,6 +493,7 @@ public abstract class FieldComparator<T> {
private final LongParser parser; private final LongParser parser;
private FieldCache.Longs currentReaderValues; private FieldCache.Longs currentReaderValues;
private long bottom; private long bottom;
private long topValue;
LongComparator(int numHits, String field, FieldCache.Parser parser, Long missingValue) { LongComparator(int numHits, String field, FieldCache.Parser parser, Long missingValue) {
super(field, missingValue); super(field, missingValue);
@ -497,21 +545,25 @@ public abstract class FieldComparator<T> {
this.bottom = values[bottom]; this.bottom = values[bottom];
} }
@Override
public void setTopValue(Long value) {
topValue = value;
}
@Override @Override
public Long value(int slot) { public Long value(int slot) {
return Long.valueOf(values[slot]); return Long.valueOf(values[slot]);
} }
@Override @Override
public int compareDocToValue(int doc, Long valueObj) { public int compareTop(int doc) {
final long value = valueObj.longValue();
long docValue = currentReaderValues.get(doc); long docValue = currentReaderValues.get(doc);
// Test for docValue == 0 to save Bits.get method call for // Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero): // the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue; docValue = missingValue;
} }
return Long.compare(docValue, value); return Long.compare(topValue, docValue);
} }
} }
@ -525,7 +577,8 @@ public abstract class FieldComparator<T> {
private final float[] scores; private final float[] scores;
private float bottom; private float bottom;
private Scorer scorer; private Scorer scorer;
private float topValue;
RelevanceComparator(int numHits) { RelevanceComparator(int numHits) {
scores = new float[numHits]; scores = new float[numHits];
} }
@ -558,6 +611,11 @@ public abstract class FieldComparator<T> {
this.bottom = scores[bottom]; this.bottom = scores[bottom];
} }
@Override
public void setTopValue(Float value) {
topValue = value;
}
@Override @Override
public void setScorer(Scorer scorer) { public void setScorer(Scorer scorer) {
// wrap with a ScoreCachingWrappingScorer so that successive calls to // wrap with a ScoreCachingWrappingScorer so that successive calls to
@ -584,11 +642,10 @@ public abstract class FieldComparator<T> {
} }
@Override @Override
public int compareDocToValue(int doc, Float valueObj) throws IOException { public int compareTop(int doc) throws IOException {
final float value = valueObj.floatValue();
float docValue = scorer.score(); float docValue = scorer.score();
assert !Float.isNaN(docValue); assert !Float.isNaN(docValue);
return Float.compare(value, docValue); return Float.compare(docValue, topValue);
} }
} }
@ -597,6 +654,7 @@ public abstract class FieldComparator<T> {
private final int[] docIDs; private final int[] docIDs;
private int docBase; private int docBase;
private int bottom; private int bottom;
private int topValue;
DocComparator(int numHits) { DocComparator(int numHits) {
docIDs = new int[numHits]; docIDs = new int[numHits];
@ -633,16 +691,20 @@ public abstract class FieldComparator<T> {
this.bottom = docIDs[bottom]; this.bottom = docIDs[bottom];
} }
@Override
public void setTopValue(Integer value) {
topValue = value;
}
@Override @Override
public Integer value(int slot) { public Integer value(int slot) {
return Integer.valueOf(docIDs[slot]); return Integer.valueOf(docIDs[slot]);
} }
@Override @Override
public int compareDocToValue(int doc, Integer valueObj) { public int compareTop(int doc) {
final int value = valueObj.intValue();
int docValue = docBase + doc; int docValue = docBase + doc;
return Integer.compare(docValue, value); return Integer.compare(topValue, docValue);
} }
} }
@ -700,13 +762,42 @@ public abstract class FieldComparator<T> {
@lucene.internal */ @lucene.internal */
BytesRef bottomValue; BytesRef bottomValue;
/** Set by setTopValue. */
BytesRef topValue;
boolean topSameReader;
int topOrd;
private int docBase;
final BytesRef tempBR = new BytesRef(); final BytesRef tempBR = new BytesRef();
/** -1 if missing values are sorted first, 1 if they are
* sorted last */
final int missingSortCmp;
/** Which ordinal to use for a missing value. */
final int missingOrd;
/** Creates this, sorting missing values first. */
public TermOrdValComparator(int numHits, String field) { public TermOrdValComparator(int numHits, String field) {
this(numHits, field, false);
}
/** Creates this, with control over how missing values
* are sorted. Pass sortMissingLast=true to put
* missing values at the end. */
public TermOrdValComparator(int numHits, String field, boolean sortMissingLast) {
ords = new int[numHits]; ords = new int[numHits];
values = new BytesRef[numHits]; values = new BytesRef[numHits];
readerGen = new int[numHits]; readerGen = new int[numHits];
this.field = field; this.field = field;
if (sortMissingLast) {
missingSortCmp = 1;
missingOrd = Integer.MAX_VALUE;
} else {
missingSortCmp = -1;
missingOrd = -1;
}
} }
@Override @Override
@ -721,140 +812,78 @@ public abstract class FieldComparator<T> {
if (val2 == null) { if (val2 == null) {
return 0; return 0;
} }
return -1; return missingSortCmp;
} else if (val2 == null) { } else if (val2 == null) {
return 1; return -missingSortCmp;
} }
return val1.compareTo(val2); return val1.compareTo(val2);
} }
@Override @Override
public int compareBottom(int doc) { public int compareBottom(int doc) {
throw new UnsupportedOperationException(); assert bottomSlot != -1;
int docOrd = termsIndex.getOrd(doc);
if (docOrd == -1) {
docOrd = missingOrd;
}
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
return -1;
}
} }
@Override @Override
public void copy(int slot, int doc) { public void copy(int slot, int doc) {
throw new UnsupportedOperationException();
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
int ord = termsIndex.getOrd(doc); int ord = termsIndex.getOrd(doc);
if (ord == -1) { if (ord == -1) {
if (value == null) { ord = missingOrd;
return 0; values[slot] = null;
} else {
assert ord >= 0;
if (values[slot] == null) {
values[slot] = new BytesRef();
} }
return -1; termsIndex.lookupOrd(ord, values[slot]);
} else if (value == null) {
return 1;
} }
termsIndex.lookupOrd(ord, tempBR); ords[slot] = ord;
return tempBR.compareTo(value); readerGen[slot] = currentReaderGen;
} }
/** Base class for specialized (per bit width of the
* ords) per-segment comparator. NOTE: this is messy;
* we do this only because hotspot can't reliably inline
* the underlying array access when looking up doc->ord
* @lucene.internal
*/
abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
@Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
return TermOrdValComparator.this.setNextReader(context);
}
@Override
public int compare(int slot1, int slot2) {
return TermOrdValComparator.this.compare(slot1, slot2);
}
@Override
public void setBottom(final int bottom) {
TermOrdValComparator.this.setBottom(bottom);
}
@Override
public BytesRef value(int slot) {
return TermOrdValComparator.this.value(slot);
}
@Override
public int compareValues(BytesRef val1, BytesRef val2) {
if (val1 == null) {
if (val2 == null) {
return 0;
}
return -1;
} else if (val2 == null) {
return 1;
}
return val1.compareTo(val2);
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
return TermOrdValComparator.this.compareDocToValue(doc, value);
}
}
// Used per-segment when docToOrd is null:
private final class AnyOrdComparator extends PerSegmentComparator {
private final SortedDocValues termsIndex;
private final int docBase;
public AnyOrdComparator(SortedDocValues termsIndex, int docBase) {
this.termsIndex = termsIndex;
this.docBase = docBase;
}
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = termsIndex.getOrd(doc);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
return -1;
}
}
@Override
public void copy(int slot, int doc) {
final int ord = termsIndex.getOrd(doc);
ords[slot] = ord;
if (ord == -1) {
values[slot] = null;
} else {
assert ord >= 0;
if (values[slot] == null) {
values[slot] = new BytesRef();
}
termsIndex.lookupOrd(ord, values[slot]);
}
readerGen[slot] = currentReaderGen;
}
}
@Override @Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException { public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
final int docBase = context.docBase; docBase = context.docBase;
termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
FieldComparator<BytesRef> perSegComp = new AnyOrdComparator(termsIndex, docBase);
currentReaderGen++; currentReaderGen++;
if (topValue != null) {
// Recompute topOrd/SameReader
int ord = termsIndex.lookupTerm(topValue);
if (ord >= 0) {
topSameReader = true;
topOrd = ord;
} else {
topSameReader = false;
topOrd = -ord-2;
}
} else {
topOrd = missingOrd;
topSameReader = true;
}
//System.out.println(" setNextReader topOrd=" + topOrd + " topSameReader=" + topSameReader);
if (bottomSlot != -1) { if (bottomSlot != -1) {
perSegComp.setBottom(bottomSlot); // Recompute bottomOrd/SameReader
setBottom(bottomSlot);
} }
return perSegComp; return this;
} }
@Override @Override
@ -867,18 +896,18 @@ public abstract class FieldComparator<T> {
bottomSameReader = true; bottomSameReader = true;
} else { } else {
if (bottomValue == null) { if (bottomValue == null) {
// -1 ord is null for all segments // missingOrd is null for all segments
assert ords[bottomSlot] == -1; assert ords[bottomSlot] == missingOrd;
bottomOrd = -1; bottomOrd = missingOrd;
bottomSameReader = true; bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen; readerGen[bottomSlot] = currentReaderGen;
} else { } else {
final int index = termsIndex.lookupTerm(bottomValue); final int ord = termsIndex.lookupTerm(bottomValue);
if (index < 0) { if (ord < 0) {
bottomOrd = -index - 2; bottomOrd = -ord - 2;
bottomSameReader = false; bottomSameReader = false;
} else { } else {
bottomOrd = index; bottomOrd = ord;
// exact value match // exact value match
bottomSameReader = true; bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen; readerGen[bottomSlot] = currentReaderGen;
@ -888,28 +917,77 @@ public abstract class FieldComparator<T> {
} }
} }
@Override
public void setTopValue(BytesRef value) {
// null is fine: it means the last doc of the prior
// search was missing this value
topValue = value;
//System.out.println("setTopValue " + topValue);
}
@Override @Override
public BytesRef value(int slot) { public BytesRef value(int slot) {
return values[slot]; return values[slot];
} }
@Override
public int compareTop(int doc) {
int ord = termsIndex.getOrd(doc);
if (ord == -1) {
ord = missingOrd;
}
if (topSameReader) {
// ord is precisely comparable, even in the equal
// case
//System.out.println("compareTop doc=" + doc + " ord=" + ord + " ret=" + (topOrd-ord));
return topOrd - ord;
} else if (ord <= topOrd) {
// the equals case always means doc is < value
// (because we set lastOrd to the lower bound)
return 1;
} else {
return -1;
}
}
@Override
public int compareValues(BytesRef val1, BytesRef val2) {
if (val1 == null) {
if (val2 == null) {
return 0;
}
return missingSortCmp;
} else if (val2 == null) {
return -missingSortCmp;
}
return val1.compareTo(val2);
}
} }
// just used internally in this comparator
private static final byte[] MISSING_BYTES = new byte[0];
/** Sorts by field's natural Term sort order. All /** Sorts by field's natural Term sort order. All
* comparisons are done using BytesRef.compareTo, which is * comparisons are done using BytesRef.compareTo, which is
* slow for medium to large result sets but possibly * slow for medium to large result sets but possibly
* very fast for very small results sets. */ * very fast for very small results sets. */
// TODO: should we remove this? who really uses it?
public static final class TermValComparator extends FieldComparator<BytesRef> { public static final class TermValComparator extends FieldComparator<BytesRef> {
// sentinels, just used internally in this comparator
private static final byte[] MISSING_BYTES = new byte[0];
private static final byte[] NON_MISSING_BYTES = new byte[0];
private BytesRef[] values; private BytesRef[] values;
private BinaryDocValues docTerms; private BinaryDocValues docTerms;
private Bits docsWithField; private Bits docsWithField;
private final String field; private final String field;
private BytesRef bottom; private BytesRef bottom;
private BytesRef topValue;
private final BytesRef tempBR = new BytesRef(); private final BytesRef tempBR = new BytesRef();
// TODO: add missing first/last support here?
/** Sole constructor. */
TermValComparator(int numHits, String field) { TermValComparator(int numHits, String field) {
values = new BytesRef[numHits]; values = new BytesRef[numHits];
this.field = field; this.field = field;
@ -919,12 +997,12 @@ public abstract class FieldComparator<T> {
public int compare(int slot1, int slot2) { public int compare(int slot1, int slot2) {
final BytesRef val1 = values[slot1]; final BytesRef val1 = values[slot1];
final BytesRef val2 = values[slot2]; final BytesRef val2 = values[slot2];
if (val1 == null) { if (val1.bytes == MISSING_BYTES) {
if (val2 == null) { if (val2.bytes == MISSING_BYTES) {
return 0; return 0;
} }
return -1; return -1;
} else if (val2 == null) { } else if (val2.bytes == MISSING_BYTES) {
return 1; return 1;
} }
@ -934,18 +1012,8 @@ public abstract class FieldComparator<T> {
@Override @Override
public int compareBottom(int doc) { public int compareBottom(int doc) {
docTerms.get(doc, tempBR); docTerms.get(doc, tempBR);
if (tempBR.length == 0 && docsWithField.get(doc) == false) { setMissingBytes(doc, tempBR);
tempBR.bytes = MISSING_BYTES; return compareValues(bottom, tempBR);
}
if (bottom.bytes == MISSING_BYTES) {
if (tempBR.bytes == MISSING_BYTES) {
return 0;
}
return -1;
} else if (tempBR.bytes == MISSING_BYTES) {
return 1;
}
return bottom.compareTo(tempBR);
} }
@Override @Override
@ -954,9 +1022,7 @@ public abstract class FieldComparator<T> {
values[slot] = new BytesRef(); values[slot] = new BytesRef();
} }
docTerms.get(doc, values[slot]); docTerms.get(doc, values[slot]);
if (values[slot].length == 0 && docsWithField.get(doc) == false) { setMissingBytes(doc, values[slot]);
values[slot].bytes = MISSING_BYTES;
}
} }
@Override @Override
@ -971,6 +1037,14 @@ public abstract class FieldComparator<T> {
this.bottom = values[bottom]; this.bottom = values[bottom];
} }
@Override
public void setTopValue(BytesRef value) {
if (value == null) {
throw new IllegalArgumentException("value cannot be null");
}
topValue = value;
}
@Override @Override
public BytesRef value(int slot) { public BytesRef value(int slot) {
return values[slot]; return values[slot];
@ -978,24 +1052,34 @@ public abstract class FieldComparator<T> {
@Override @Override
public int compareValues(BytesRef val1, BytesRef val2) { public int compareValues(BytesRef val1, BytesRef val2) {
if (val1 == null) { // missing always sorts first:
if (val2 == null) { if (val1.bytes == MISSING_BYTES) {
if (val2.bytes == MISSING_BYTES) {
return 0; return 0;
} }
return -1; return -1;
} else if (val2 == null) { } else if (val2.bytes == MISSING_BYTES) {
return 1; return 1;
} }
return val1.compareTo(val2); return val1.compareTo(val2);
} }
@Override @Override
public int compareDocToValue(int doc, BytesRef value) { public int compareTop(int doc) {
docTerms.get(doc, tempBR); docTerms.get(doc, tempBR);
if (tempBR.length == 0 && docsWithField.get(doc) == false) { setMissingBytes(doc, tempBR);
tempBR.bytes = MISSING_BYTES; return compareValues(topValue, tempBR);
}
private void setMissingBytes(int doc, BytesRef br) {
if (br.length == 0) {
br.offset = 0;
if (docsWithField.get(doc) == false) {
br.bytes = MISSING_BYTES;
} else {
br.bytes = NON_MISSING_BYTES;
}
} }
return tempBR.compareTo(value);
} }
} }
} }

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Arrays;
/** /**
* Expert: A ScoreDoc which also contains information about * Expert: A ScoreDoc which also contains information about
* how to sort the referenced document. In addition to the * how to sort the referenced document. In addition to the
@ -69,14 +71,10 @@ public class FieldDoc extends ScoreDoc {
@Override @Override
public String toString() { public String toString() {
// super.toString returns the doc and score information, so just add the // super.toString returns the doc and score information, so just add the
// fields information // fields information
StringBuilder sb = new StringBuilder(super.toString()); StringBuilder sb = new StringBuilder(super.toString());
sb.append("["); sb.append(" fields=");
for (int i = 0; i < fields.length; i++) { sb.append(Arrays.toString(fields));
sb.append(fields[i]).append(", ");
}
sb.setLength(sb.length() - 2); // discard last ", "
sb.append("]");
return sb.toString(); return sb.toString();
} }
} }

View File

@ -106,6 +106,9 @@ public class SortField {
// Used for 'sortMissingFirst/Last' // Used for 'sortMissingFirst/Last'
public Object missingValue = null; public Object missingValue = null;
// Only used with type=STRING
public boolean sortMissingLast;
/** Creates a sort by terms in the given field with the type of term /** Creates a sort by terms in the given field with the type of term
* values explicitly given. * values explicitly given.
* @param field Name of field to sort by. Can be <code>null</code> if * @param field Name of field to sort by. Can be <code>null</code> if
@ -165,13 +168,34 @@ public class SortField {
this.reverse = reverse; this.reverse = reverse;
this.parser = parser; this.parser = parser;
} }
/** Pass this to {@link #setMissingValue} to have missing
* string values sort first. */
public final static Object STRING_FIRST = new Object() {
@Override
public String toString() {
return "SortField.STRING_FIRST";
}
};
public SortField setMissingValue(Object missingValue) { /** Pass this to {@link #setMissingValue} to have missing
if (type != Type.INT && type != Type.FLOAT && type != Type.LONG && type != Type.DOUBLE) { * string values sort last. */
throw new IllegalArgumentException( "Missing value only works for numeric types" ); public final static Object STRING_LAST = new Object() {
@Override
public String toString() {
return "SortField.STRING_LAST";
}
};
public void setMissingValue(Object missingValue) {
if (type == Type.STRING) {
if (missingValue != STRING_FIRST && missingValue != STRING_LAST) {
throw new IllegalArgumentException("For STRING type, missing value must be either STRING_FIRST or STRING_LAST");
}
} else if (type != Type.INT && type != Type.FLOAT && type != Type.LONG && type != Type.DOUBLE) {
throw new IllegalArgumentException("Missing value only works for numeric or STRING types");
} }
this.missingValue = missingValue; this.missingValue = missingValue;
return this;
} }
/** Creates a sort with a custom comparison function. /** Creates a sort with a custom comparison function.
@ -294,6 +318,10 @@ public class SortField {
} }
if (reverse) buffer.append('!'); if (reverse) buffer.append('!');
if (missingValue != null) {
buffer.append(" missingValue=");
buffer.append(missingValue);
}
return buffer.toString(); return buffer.toString();
} }
@ -376,9 +404,10 @@ public class SortField {
return comparatorSource.newComparator(field, numHits, sortPos, reverse); return comparatorSource.newComparator(field, numHits, sortPos, reverse);
case STRING: case STRING:
return new FieldComparator.TermOrdValComparator(numHits, field); return new FieldComparator.TermOrdValComparator(numHits, field, missingValue == STRING_LAST);
case STRING_VAL: case STRING_VAL:
// TODO: should we remove this? who really uses it?
return new FieldComparator.TermValComparator(numHits, field); return new FieldComparator.TermValComparator(numHits, field);
case REWRITEABLE: case REWRITEABLE:

View File

@ -869,6 +869,13 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
maxScore = Float.NEGATIVE_INFINITY; maxScore = Float.NEGATIVE_INFINITY;
// Tell all comparators their top value:
for(int i=0;i<comparators.length;i++) {
@SuppressWarnings("unchecked")
FieldComparator<Object> comparator = (FieldComparator<Object>) comparators[i];
comparator.setTopValue(after.fields[i]);
}
} }
void updateBottom(int doc, float score) { void updateBottom(int doc, float score) {
@ -880,37 +887,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@SuppressWarnings({"unchecked", "rawtypes"}) @SuppressWarnings({"unchecked", "rawtypes"})
@Override @Override
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
totalHits++;
//System.out.println(" collect doc=" + doc); //System.out.println(" collect doc=" + doc);
// Check if this hit was already collected on a totalHits++;
// previous page:
boolean sameValues = true;
for(int compIDX=0;compIDX<comparators.length;compIDX++) {
final FieldComparator comp = comparators[compIDX];
final int cmp = reverseMul[compIDX] * comp.compareDocToValue(doc, after.fields[compIDX]);
if (cmp < 0) {
// Already collected on a previous page
//System.out.println(" skip: before");
return;
} else if (cmp > 0) {
// Not yet collected
sameValues = false;
//System.out.println(" keep: after");
break;
}
}
// Tie-break by docID:
if (sameValues && doc <= afterDoc) {
// Already collected on a previous page
//System.out.println(" skip: tie-break");
return;
}
collectedHits++;
float score = Float.NaN; float score = Float.NaN;
if (trackMaxScore) { if (trackMaxScore) {
@ -921,7 +900,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
} }
if (queueFull) { if (queueFull) {
// Fastmatch: return if this hit is not competitive // Fastmatch: return if this hit is no better than
// the worst hit currently in the queue:
for (int i = 0;; i++) { for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc); final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) { if (c < 0) {
@ -939,7 +919,35 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
break; break;
} }
} }
}
// Check if this hit was already collected on a
// previous page:
boolean sameValues = true;
for(int compIDX=0;compIDX<comparators.length;compIDX++) {
final FieldComparator comp = comparators[compIDX];
final int cmp = reverseMul[compIDX] * comp.compareTop(doc);
if (cmp > 0) {
// Already collected on a previous page
//System.out.println(" skip: before");
return;
} else if (cmp < 0) {
// Not yet collected
sameValues = false;
//System.out.println(" keep: after; reverseMul=" + reverseMul[compIDX]);
break;
}
}
// Tie-break by docID:
if (sameValues && doc <= afterDoc) {
// Already collected on a previous page
//System.out.println(" skip: tie-break");
return;
}
if (queueFull) {
// This hit is competitive - replace bottom element in queue & adjustTop // This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) { for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc); comparators[i].copy(bottom.slot, doc);
@ -955,6 +963,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
comparators[i].setBottom(bottom.slot); comparators[i].setBottom(bottom.slot);
} }
} else { } else {
collectedHits++;
// Startup transient: queue hasn't gathered numHits yet // Startup transient: queue hasn't gathered numHits yet
final int slot = collectedHits - 1; final int slot = collectedHits - 1;
//System.out.println(" slot=" + slot); //System.out.println(" slot=" + slot);

View File

@ -96,7 +96,7 @@ package org.apache.lucene.util;
public final class UnicodeUtil { public final class UnicodeUtil {
/** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms /** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
* one would normally encounter, and definitely bigger than any UTF-8 terms. * (e.g. collation keys) one would normally encounter, and definitely bigger than any UTF-8 terms.
* <p> * <p>
* WARNING: This is not a valid UTF8 Term * WARNING: This is not a valid UTF8 Term
**/ **/

View File

@ -145,6 +145,11 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
@Override
public void setTopValue(Object value) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override @Override
public FieldComparator<Object> setNextReader(AtomicReaderContext context) { public FieldComparator<Object> setNextReader(AtomicReaderContext context) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -156,7 +161,7 @@ final class JustCompileSearch {
} }
@Override @Override
public int compareDocToValue(int doc, Object value) { public int compareTop(int doc) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
} }

View File

@ -157,6 +157,11 @@ class ElevationComparatorSource extends FieldComparatorSource {
bottomVal = values[slot]; bottomVal = values[slot];
} }
@Override
public void setTopValue(Integer value) {
throw new UnsupportedOperationException();
}
private int docVal(int doc) { private int docVal(int doc) {
int ord = idIndex.getOrd(doc); int ord = idIndex.getOrd(doc);
if (ord == -1) { if (ord == -1) {
@ -190,11 +195,8 @@ class ElevationComparatorSource extends FieldComparatorSource {
} }
@Override @Override
public int compareDocToValue(int doc, Integer valueObj) { public int compareTop(int doc) {
final int value = valueObj.intValue(); throw new UnsupportedOperationException();
final int docValue = docVal(doc);
// values will be small enough that there is no overflow concern
return value - docValue;
} }
}; };
} }

View File

@ -17,7 +17,10 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -29,6 +32,7 @@ import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -45,39 +49,127 @@ public class TestSearchAfter extends LuceneTestCase {
private Directory dir; private Directory dir;
private IndexReader reader; private IndexReader reader;
private IndexSearcher searcher; private IndexSearcher searcher;
private int iter;
private List<SortField> allSortFields;
@Override @Override
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
allSortFields = new ArrayList<SortField>(Arrays.asList(new SortField[] {
new SortField("int", SortField.Type.INT, false),
new SortField("long", SortField.Type.LONG, false),
new SortField("float", SortField.Type.FLOAT, false),
new SortField("double", SortField.Type.DOUBLE, false),
new SortField("bytes", SortField.Type.STRING, false),
new SortField("bytesval", SortField.Type.STRING_VAL, false),
new SortField("intdocvalues", SortField.Type.INT, false),
new SortField("floatdocvalues", SortField.Type.FLOAT, false),
new SortField("sortedbytesdocvalues", SortField.Type.STRING, false),
new SortField("sortedbytesdocvaluesval", SortField.Type.STRING_VAL, false),
new SortField("straightbytesdocvalues", SortField.Type.STRING_VAL, false),
new SortField("int", SortField.Type.INT, true),
new SortField("long", SortField.Type.LONG, true),
new SortField("float", SortField.Type.FLOAT, true),
new SortField("double", SortField.Type.DOUBLE, true),
new SortField("bytes", SortField.Type.STRING, true),
new SortField("bytesval", SortField.Type.STRING_VAL, true),
new SortField("intdocvalues", SortField.Type.INT, true),
new SortField("floatdocvalues", SortField.Type.FLOAT, true),
new SortField("sortedbytesdocvalues", SortField.Type.STRING, true),
new SortField("sortedbytesdocvaluesval", SortField.Type.STRING_VAL, true),
new SortField("straightbytesdocvalues", SortField.Type.STRING_VAL, true),
SortField.FIELD_SCORE,
SortField.FIELD_DOC,
}));
// Also test missing first / last for the "string" sorts:
for(String field : new String[] {"bytes", "sortedbytesdocvalues"}) {
for(int rev=0;rev<2;rev++) {
boolean reversed = rev == 0;
SortField sf = new SortField(field, SortField.Type.STRING, reversed);
sf.setMissingValue(SortField.STRING_FIRST);
allSortFields.add(sf);
sf = new SortField(field, SortField.Type.STRING, reversed);
sf.setMissingValue(SortField.STRING_LAST);
allSortFields.add(sf);
}
}
int limit = allSortFields.size();
for(int i=0;i<limit;i++) {
SortField sf = allSortFields.get(i);
if (sf.getType() == SortField.Type.INT) {
SortField sf2 = new SortField(sf.getField(), SortField.Type.INT, sf.getReverse());
sf2.setMissingValue(random().nextInt());
allSortFields.add(sf2);
} else if (sf.getType() == SortField.Type.LONG) {
SortField sf2 = new SortField(sf.getField(), SortField.Type.LONG, sf.getReverse());
sf2.setMissingValue(random().nextLong());
allSortFields.add(sf2);
} else if (sf.getType() == SortField.Type.FLOAT) {
SortField sf2 = new SortField(sf.getField(), SortField.Type.FLOAT, sf.getReverse());
sf2.setMissingValue(random().nextFloat());
allSortFields.add(sf2);
} else if (sf.getType() == SortField.Type.DOUBLE) {
SortField sf2 = new SortField(sf.getField(), SortField.Type.DOUBLE, sf.getReverse());
sf2.setMissingValue(random().nextDouble());
allSortFields.add(sf2);
}
}
dir = newDirectory(); dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir); RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
int numDocs = atLeast(200); int numDocs = atLeast(200);
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {
List<Field> fields = new ArrayList<Field>();
fields.add(newTextField("english", English.intToEnglish(i), Field.Store.NO));
fields.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
fields.add(newStringField("byte", "" + ((byte) random().nextInt()), Field.Store.NO));
fields.add(newStringField("short", "" + ((short) random().nextInt()), Field.Store.NO));
fields.add(new IntField("int", random().nextInt(), Field.Store.NO));
fields.add(new LongField("long", random().nextLong(), Field.Store.NO));
fields.add(new FloatField("float", random().nextFloat(), Field.Store.NO));
fields.add(new DoubleField("double", random().nextDouble(), Field.Store.NO));
fields.add(newStringField("bytes", _TestUtil.randomRealisticUnicodeString(random()), Field.Store.NO));
fields.add(newStringField("bytesval", _TestUtil.randomRealisticUnicodeString(random()), Field.Store.NO));
fields.add(new DoubleField("double", random().nextDouble(), Field.Store.NO));
fields.add(new NumericDocValuesField("intdocvalues", random().nextInt()));
fields.add(new FloatDocValuesField("floatdocvalues", random().nextFloat()));
fields.add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
fields.add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
fields.add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
Document document = new Document(); Document document = new Document();
document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO)); document.add(new StoredField("id", ""+i));
document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); if (VERBOSE) {
document.add(newStringField("byte", "" + ((byte) random().nextInt()), Field.Store.NO)); System.out.println(" add doc id=" + i);
document.add(newStringField("short", "" + ((short) random().nextInt()), Field.Store.NO)); }
document.add(new IntField("int", random().nextInt(), Field.Store.NO)); for(Field field : fields) {
document.add(new LongField("long", random().nextLong(), Field.Store.NO)); // So we are sometimes missing that field:
if (random().nextInt(5) != 4) {
document.add(new FloatField("float", random().nextFloat(), Field.Store.NO)); document.add(field);
document.add(new DoubleField("double", random().nextDouble(), Field.Store.NO)); if (VERBOSE) {
document.add(newStringField("bytes", _TestUtil.randomRealisticUnicodeString(random()), Field.Store.NO)); System.out.println(" " + field);
document.add(newStringField("bytesval", _TestUtil.randomRealisticUnicodeString(random()), Field.Store.NO)); }
document.add(new DoubleField("double", random().nextDouble(), Field.Store.NO)); }
}
document.add(new NumericDocValuesField("intdocvalues", random().nextInt()));
document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat()));
document.add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
document.add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
document.add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
iw.addDocument(document); iw.addDocument(document);
if (random().nextInt(50) == 17) {
iw.commit();
}
} }
reader = iw.getReader(); reader = iw.getReader();
iw.close(); iw.close();
searcher = newSearcher(reader); searcher = newSearcher(reader);
if (VERBOSE) {
System.out.println(" searcher=" + searcher);
}
} }
@Override @Override
@ -104,25 +196,25 @@ public class TestSearchAfter extends LuceneTestCase {
assertQuery(bq, null); assertQuery(bq, null);
} }
} }
void assertQuery(Query query, Filter filter) throws Exception { void assertQuery(Query query, Filter filter) throws Exception {
assertQuery(query, filter, null); assertQuery(query, filter, null);
assertQuery(query, filter, Sort.RELEVANCE); assertQuery(query, filter, Sort.RELEVANCE);
assertQuery(query, filter, Sort.INDEXORDER); assertQuery(query, filter, Sort.INDEXORDER);
for(int rev=0;rev<2;rev++) { for(SortField sortField : allSortFields) {
boolean reversed = rev == 1; assertQuery(query, filter, new Sort(new SortField[] {sortField}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("int", SortField.Type.INT, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("long", SortField.Type.LONG, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("float", SortField.Type.FLOAT, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("double", SortField.Type.DOUBLE, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytes", SortField.Type.STRING, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytesval", SortField.Type.STRING_VAL, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("intdocvalues", SortField.Type.INT, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("floatdocvalues", SortField.Type.FLOAT, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("sortedbytesdocvalues", SortField.Type.STRING, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("sortedbytesdocvaluesval", SortField.Type.STRING_VAL, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("straightbytesdocvalues", SortField.Type.STRING_VAL, reversed)}));
} }
for(int i=0;i<20;i++) {
assertQuery(query, filter, getRandomSort());
}
}
Sort getRandomSort() {
SortField[] sortFields = new SortField[_TestUtil.nextInt(random(), 2, 7)];
for(int i=0;i<sortFields.length;i++) {
sortFields[i] = allSortFields.get(random().nextInt(allSortFields.size()));
}
return new Sort(sortFields);
} }
void assertQuery(Query query, Filter filter, Sort sort) throws Exception { void assertQuery(Query query, Filter filter, Sort sort) throws Exception {
@ -130,18 +222,23 @@ public class TestSearchAfter extends LuceneTestCase {
TopDocs all; TopDocs all;
int pageSize = _TestUtil.nextInt(random(), 1, maxDoc*2); int pageSize = _TestUtil.nextInt(random(), 1, maxDoc*2);
if (VERBOSE) { if (VERBOSE) {
System.out.println("\nassertQuery: query=" + query + " filter=" + filter + " sort=" + sort + " pageSize=" + pageSize); System.out.println("\nassertQuery " + (iter++) + ": query=" + query + " filter=" + filter + " sort=" + sort + " pageSize=" + pageSize);
} }
final boolean doMaxScore = random().nextBoolean(); final boolean doMaxScore = random().nextBoolean();
final boolean doScores = random().nextBoolean();
if (sort == null) { if (sort == null) {
all = searcher.search(query, filter, maxDoc); all = searcher.search(query, filter, maxDoc);
} else if (sort == Sort.RELEVANCE) { } else if (sort == Sort.RELEVANCE) {
all = searcher.search(query, filter, maxDoc, sort, true, doMaxScore); all = searcher.search(query, filter, maxDoc, sort, true, doMaxScore);
} else { } else {
all = searcher.search(query, filter, maxDoc, sort); all = searcher.search(query, filter, maxDoc, sort, doScores, doMaxScore);
} }
if (VERBOSE) { if (VERBOSE) {
System.out.println(" all.totalHits=" + all.totalHits); System.out.println(" all.totalHits=" + all.totalHits);
int upto = 0;
for(ScoreDoc scoreDoc : all.scoreDocs) {
System.out.println(" hit " + (upto++) + ": id=" + searcher.doc(scoreDoc.doc).get("id") + " " + scoreDoc);
}
} }
int pageStart = 0; int pageStart = 0;
ScoreDoc lastBottom = null; ScoreDoc lastBottom = null;
@ -154,14 +251,17 @@ public class TestSearchAfter extends LuceneTestCase {
paged = searcher.searchAfter(lastBottom, query, filter, pageSize); paged = searcher.searchAfter(lastBottom, query, filter, pageSize);
} else { } else {
if (VERBOSE) { if (VERBOSE) {
System.out.println(" iter lastBottom=" + lastBottom + (lastBottom == null ? "" : " fields=" + Arrays.toString(((FieldDoc) lastBottom).fields))); System.out.println(" iter lastBottom=" + lastBottom);
} }
if (sort == Sort.RELEVANCE) { if (sort == Sort.RELEVANCE) {
paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort, true, doMaxScore); paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort, true, doMaxScore);
} else { } else {
paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort); paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort, doScores, doMaxScore);
} }
} }
if (VERBOSE) {
System.out.println(" " + paged.scoreDocs.length + " hits on page");
}
if (paged.scoreDocs.length == 0) { if (paged.scoreDocs.length == 0) {
break; break;
@ -173,11 +273,16 @@ public class TestSearchAfter extends LuceneTestCase {
assertEquals(all.scoreDocs.length, pageStart); assertEquals(all.scoreDocs.length, pageStart);
} }
static void assertPage(int pageStart, TopDocs all, TopDocs paged) { void assertPage(int pageStart, TopDocs all, TopDocs paged) throws IOException {
assertEquals(all.totalHits, paged.totalHits); assertEquals(all.totalHits, paged.totalHits);
for (int i = 0; i < paged.scoreDocs.length; i++) { for (int i = 0; i < paged.scoreDocs.length; i++) {
ScoreDoc sd1 = all.scoreDocs[pageStart + i]; ScoreDoc sd1 = all.scoreDocs[pageStart + i];
ScoreDoc sd2 = paged.scoreDocs[i]; ScoreDoc sd2 = paged.scoreDocs[i];
if (VERBOSE) {
System.out.println(" hit " + (pageStart + i));
System.out.println(" expected id=" + searcher.doc(sd1.doc).get("id") + " " + sd1);
System.out.println(" actual id=" + searcher.doc(sd2.doc).get("id") + " " + sd2);
}
assertEquals(sd1.doc, sd2.doc); assertEquals(sd1.doc, sd2.doc);
assertEquals(sd1.score, sd2.score, 0f); assertEquals(sd1.score, sd2.score, 0f);
if (sd1 instanceof FieldDoc) { if (sd1 instanceof FieldDoc) {

View File

@ -196,6 +196,132 @@ public class TestSort extends LuceneTestCase {
ir.close(); ir.close();
dir.close(); dir.close();
} }
/** Tests sorting on type string with a missing
* value sorted first */
public void testStringMissingSortedFirst() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
SortField sf = new SortField("value", SortField.Type.STRING);
Sort sort = new Sort(sf);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null comes first
assertNull(searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("foo", searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests reverse sorting on type string with a missing
* value sorted first */
public void testStringMissingSortedFirstReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
SortField sf = new SortField("value", SortField.Type.STRING, true);
Sort sort = new Sort(sf);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
assertEquals("foo", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value"));
// null comes last
assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type string with a missing
* value sorted last */
public void testStringValMissingSortedLast() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
SortField sf = new SortField("value", SortField.Type.STRING);
sf.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sf);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value"));
// null comes last
assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests reverse sorting on type string with a missing
* value sorted last */
public void testStringValMissingSortedLastReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
SortField sf = new SortField("value", SortField.Type.STRING, true);
sf.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sf);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null comes first
assertNull(searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("bar", searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests reverse sorting on type string_val */ /** Tests reverse sorting on type string_val */
public void testStringValReverse() throws IOException { public void testStringValReverse() throws IOException {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
@ -29,6 +30,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
@ -60,30 +62,44 @@ public class TestSortRandom extends LuceneTestCase {
final List<BytesRef> docValues = new ArrayList<BytesRef>(); final List<BytesRef> docValues = new ArrayList<BytesRef>();
// TODO: deletions // TODO: deletions
while (numDocs < NUM_DOCS) { while (numDocs < NUM_DOCS) {
final String s;
if (random.nextBoolean()) {
s = _TestUtil.randomSimpleString(random, maxLength);
} else {
s = _TestUtil.randomUnicodeString(random, maxLength);
}
final BytesRef br = new BytesRef(s);
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
final Document doc = new Document(); final Document doc = new Document();
doc.add(new SortedDocValuesField("stringdv", br));
doc.add(newStringField("string", s, Field.Store.NO)); // 10% of the time, the document is missing the value:
final BytesRef br;
if (random().nextInt(10) != 7) {
final String s;
if (random.nextBoolean()) {
s = _TestUtil.randomSimpleString(random, maxLength);
} else {
s = _TestUtil.randomUnicodeString(random, maxLength);
}
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
br = new BytesRef(s);
doc.add(new SortedDocValuesField("stringdv", br));
doc.add(newStringField("string", s, Field.Store.NO));
docValues.add(br);
} else {
br = null;
if (VERBOSE) {
System.out.println(" " + numDocs + ": <missing>");
}
docValues.add(null);
}
doc.add(new NumericDocValuesField("id", numDocs)); doc.add(new NumericDocValuesField("id", numDocs));
docValues.add(br); doc.add(new StoredField("id", numDocs));
writer.addDocument(doc); writer.addDocument(doc);
numDocs++; numDocs++;
@ -103,13 +119,26 @@ public class TestSortRandom extends LuceneTestCase {
final int ITERS = atLeast(100); final int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) { for(int iter=0;iter<ITERS;iter++) {
final boolean reverse = random.nextBoolean(); final boolean reverse = random.nextBoolean();
final TopFieldDocs hits; final TopFieldDocs hits;
final SortField sf; final SortField sf;
final boolean sortMissingLast;
final boolean missingIsNull;
if (random.nextBoolean()) { if (random.nextBoolean()) {
sf = new SortField("stringdv", SortField.Type.STRING, reverse); sf = new SortField("stringdv", SortField.Type.STRING, reverse);
// Can only use sort missing if the DVFormat
// supports docsWithField:
sortMissingLast = defaultCodecSupportsDocsWithField() && random().nextBoolean();
missingIsNull = defaultCodecSupportsDocsWithField();
} else { } else {
sf = new SortField("string", SortField.Type.STRING, reverse); sf = new SortField("string", SortField.Type.STRING, reverse);
sortMissingLast = random().nextBoolean();
missingIsNull = true;
} }
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
final Sort sort; final Sort sort;
if (random.nextBoolean()) { if (random.nextBoolean()) {
sort = new Sort(sf); sort = new Sort(sf);
@ -138,11 +167,34 @@ public class TestSortRandom extends LuceneTestCase {
} }
if (VERBOSE) { if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse); System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
} }
// Compute expected results: // Compute expected results:
Collections.sort(f.matchValues); Collections.sort(f.matchValues, new Comparator<BytesRef>() {
@Override
public int compare(BytesRef a, BytesRef b) {
if (a == null) {
if (b == null) {
return 0;
}
if (sortMissingLast) {
return 1;
} else {
return -1;
}
} else if (b == null) {
if (sortMissingLast) {
return -1;
} else {
return 1;
}
} else {
return a.compareTo(b);
}
}
});
if (reverse) { if (reverse) {
Collections.reverse(f.matchValues); Collections.reverse(f.matchValues);
} }
@ -150,7 +202,11 @@ public class TestSortRandom extends LuceneTestCase {
if (VERBOSE) { if (VERBOSE) {
System.out.println(" expected:"); System.out.println(" expected:");
for(int idx=0;idx<expected.size();idx++) { for(int idx=0;idx<expected.size();idx++) {
System.out.println(" " + idx + ": " + expected.get(idx).utf8ToString()); BytesRef br = expected.get(idx);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
if (idx == hitCount-1) { if (idx == hitCount-1) {
break; break;
} }
@ -161,12 +217,30 @@ public class TestSortRandom extends LuceneTestCase {
System.out.println(" actual:"); System.out.println(" actual:");
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) { for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX]; final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
System.out.println(" " + hitIDX + ": " + ((BytesRef) fd.fields[0]).utf8ToString()); BytesRef br = (BytesRef) fd.fields[0];
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
} }
} }
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) { for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX]; final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
assertEquals(expected.get(hitIDX), (BytesRef) fd.fields[0]); BytesRef br = expected.get(hitIDX);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
// Normally, the old codecs (that don't support
// docsWithField via doc values) will always return
// an empty BytesRef for the missing case; however,
// if all docs in a given segment were missing, in
// that case it will return null! So we must map
// null here, too:
BytesRef br2 = (BytesRef) fd.fields[0];
if (br2 == null && missingIsNull == false) {
br2 = new BytesRef();
}
assertEquals(br, br2);
} }
} }

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.Scorer;
class ExpressionComparator extends FieldComparator<Double> { class ExpressionComparator extends FieldComparator<Double> {
private final double[] values; private final double[] values;
private double bottom; private double bottom;
private double topValue;
private ValueSource source; private ValueSource source;
private FunctionValues scores; private FunctionValues scores;
@ -66,6 +67,11 @@ class ExpressionComparator extends FieldComparator<Double> {
bottom = values[slot]; bottom = values[slot];
} }
@Override
public void setTopValue(Double value) {
topValue = value.doubleValue();
}
@Override @Override
public int compareBottom(int doc) throws IOException { public int compareBottom(int doc) throws IOException {
return Double.compare(bottom, scores.doubleVal(doc)); return Double.compare(bottom, scores.doubleVal(doc));
@ -88,7 +94,7 @@ class ExpressionComparator extends FieldComparator<Double> {
} }
@Override @Override
public int compareDocToValue(int doc, Double valueObj) throws IOException { public int compareTop(int doc) throws IOException {
return Double.compare(scores.doubleVal(doc), valueObj.doubleValue()); return Double.compare(topValue, scores.doubleVal(doc));
} }
} }

View File

@ -59,6 +59,11 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
wrappedComparator.setBottom(slot); wrappedComparator.setBottom(slot);
} }
@Override
public void setTopValue(Object value) {
wrappedComparator.setTopValue(value);
}
@Override @Override
public FieldComparator<Object> setNextReader(AtomicReaderContext context) throws IOException { public FieldComparator<Object> setNextReader(AtomicReaderContext context) throws IOException {
DocIdSet innerDocuments = childFilter.getDocIdSet(context, null); DocIdSet innerDocuments = childFilter.getDocIdSet(context, null);
@ -193,7 +198,7 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public int compareDocToValue(int parentDoc, Object value) throws IOException { public int compareTop(int parentDoc) throws IOException {
if (parentDoc == 0 || parentDocuments == null || childDocuments == null) { if (parentDoc == 0 || parentDocuments == null || childDocuments == null) {
return 0; return 0;
} }
@ -216,7 +221,7 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
if (childDoc >= parentDoc || childDoc == -1) { if (childDoc >= parentDoc || childDoc == -1) {
return cmp; return cmp;
} }
int cmp1 = wrappedComparator.compareDocToValue(childDoc, value); int cmp1 = wrappedComparator.compareTop(childDoc);
if (cmp1 > 0) { if (cmp1 > 0) {
return cmp1; return cmp1;
} else { } else {
@ -309,7 +314,7 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public int compareDocToValue(int parentDoc, Object value) throws IOException { public int compareTop(int parentDoc) throws IOException {
if (parentDoc == 0 || parentDocuments == null || childDocuments == null) { if (parentDoc == 0 || parentDocuments == null || childDocuments == null) {
return 0; return 0;
} }
@ -330,7 +335,7 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
if (childDoc >= parentDoc || childDoc == -1) { if (childDoc >= parentDoc || childDoc == -1) {
return cmp; return cmp;
} }
int cmp1 = wrappedComparator.compareDocToValue(childDoc, value); int cmp1 = wrappedComparator.compareTop(childDoc);
if (cmp1 < 0) { if (cmp1 < 0) {
return cmp1; return cmp1;
} else { } else {

View File

@ -131,6 +131,7 @@ public abstract class ValueSource {
private FunctionValues docVals; private FunctionValues docVals;
private double bottom; private double bottom;
private final Map fcontext; private final Map fcontext;
private double topValue;
ValueSourceComparator(Map fcontext, int numHits) { ValueSourceComparator(Map fcontext, int numHits) {
this.fcontext = fcontext; this.fcontext = fcontext;
@ -163,16 +164,20 @@ public abstract class ValueSource {
this.bottom = values[bottom]; this.bottom = values[bottom];
} }
@Override
public void setTopValue(final Double value) {
this.topValue = value.doubleValue();
}
@Override @Override
public Double value(int slot) { public Double value(int slot) {
return values[slot]; return values[slot];
} }
@Override @Override
public int compareDocToValue(int doc, Double valueObj) { public int compareTop(int doc) {
final double value = valueObj;
final double docValue = docVals.doubleVal(doc); final double docValue = docVals.doubleVal(doc);
return Double.compare(docValue, value); return Double.compare(topValue, docValue);
} }
} }
} }

View File

@ -44,6 +44,7 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
private final String field; private final String field;
final Collator collator; final Collator collator;
private String bottom; private String bottom;
private String topValue;
private final BytesRef tempBR = new BytesRef(); private final BytesRef tempBR = new BytesRef();
public SlowCollatedStringComparator(int numHits, String field, Collator collator) { public SlowCollatedStringComparator(int numHits, String field, Collator collator) {
@ -104,6 +105,11 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
this.bottom = values[bottom]; this.bottom = values[bottom];
} }
@Override
public void setTopValue(final String value) {
this.topValue = value;
}
@Override @Override
public String value(int slot) { public String value(int slot) {
return values[slot]; return values[slot];
@ -124,7 +130,7 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
} }
@Override @Override
public int compareDocToValue(int doc, String value) { public int compareTop(int doc) {
currentDocTerms.get(doc, tempBR); currentDocTerms.get(doc, tempBR);
final String docValue; final String docValue;
if (tempBR.length == 0 && docsWithField.get(doc) == false) { if (tempBR.length == 0 && docsWithField.get(doc) == false) {
@ -132,6 +138,6 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
} else { } else {
docValue = tempBR.utf8ToString(); docValue = tempBR.utf8ToString();
} }
return compareValues(docValue, value); return compareValues(topValue, docValue);
} }
} }

View File

@ -632,7 +632,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
randomIOExceptionRateOnOpen = 0.0; randomIOExceptionRateOnOpen = 0.0;
if (DirectoryReader.indexExists(this)) { if (DirectoryReader.indexExists(this)) {
if (LuceneTestCase.VERBOSE) { if (LuceneTestCase.VERBOSE) {
System.out.println("\nNOTE: MockDirectoryWrapper: now crash"); System.out.println("\nNOTE: MockDirectoryWrapper: now crush");
} }
crash(); // corrupt any unsynced-files crash(); // corrupt any unsynced-files
if (LuceneTestCase.VERBOSE) { if (LuceneTestCase.VERBOSE) {

View File

@ -209,6 +209,10 @@ Bug Fixes
* SOLR-5636: SolrRequestParsers does some xpath lookups on every request, which * SOLR-5636: SolrRequestParsers does some xpath lookups on every request, which
can cause concurrency issues. (Mark Miller) can cause concurrency issues. (Mark Miller)
* LUCENE-5399, SOLR-5354 sort wouldn't work correctly with
distributed searching for some field types such as legacy numeric
types (Rob Muir, Mike McCandless)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -576,6 +576,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
return new FieldComparator<Integer>() { return new FieldComparator<Integer>() {
private final int[] values = new int[numHits]; private final int[] values = new int[numHits];
private int bottomVal; private int bottomVal;
private int topVal;
private TermsEnum termsEnum; private TermsEnum termsEnum;
private DocsEnum docsEnum; private DocsEnum docsEnum;
Set<String> seen = new HashSet<String>(elevations.ids.size()); Set<String> seen = new HashSet<String>(elevations.ids.size());
@ -590,6 +591,11 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
bottomVal = values[slot]; bottomVal = values[slot];
} }
@Override
public void setTopValue(Integer value) {
topVal = value.intValue();
}
private int docVal(int doc) { private int docVal(int doc) {
if (ordSet.size() > 0) { if (ordSet.size() > 0) {
int slot = ordSet.find(doc); int slot = ordSet.find(doc);
@ -646,10 +652,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
} }
@Override @Override
public int compareDocToValue(int doc, Integer valueObj) { public int compareTop(int doc) {
final int value = valueObj.intValue();
final int docValue = docVal(doc); final int docValue = docVal(doc);
return docValue - value; // values will be small enough that there is no overflow concern return topVal - docValue; // values will be small enough that there is no overflow concern
} }
}; };
} }

View File

@ -178,7 +178,9 @@ public class EnumField extends PrimitiveFieldType {
public SortField getSortField(SchemaField field, boolean top) { public SortField getSortField(SchemaField field, boolean top) {
field.checkSortability(); field.checkSortability();
final Object missingValue = Integer.MIN_VALUE; final Object missingValue = Integer.MIN_VALUE;
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top).setMissingValue(missingValue); SortField sf = new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top);
sf.setMissingValue(missingValue);
return sf;
} }
/** /**

View File

@ -109,6 +109,7 @@ public class RandomSortField extends FieldType {
int seed; int seed;
private final int[] values = new int[numHits]; private final int[] values = new int[numHits];
int bottomVal; int bottomVal;
int topVal;
@Override @Override
public int compare(int slot1, int slot2) { public int compare(int slot1, int slot2) {
@ -120,6 +121,11 @@ public class RandomSortField extends FieldType {
bottomVal = values[slot]; bottomVal = values[slot];
} }
@Override
public void setTopValue(Integer value) {
topVal = value.intValue();
}
@Override @Override
public int compareBottom(int doc) { public int compareBottom(int doc) {
return bottomVal - hash(doc+seed); return bottomVal - hash(doc+seed);
@ -142,9 +148,9 @@ public class RandomSortField extends FieldType {
} }
@Override @Override
public int compareDocToValue(int doc, Integer valueObj) { public int compareTop(int doc) {
// values will be positive... no overflow possible. // values will be positive... no overflow possible.
return hash(doc+seed) - valueObj.intValue(); return topVal - hash(doc+seed);
} }
}; };
} }

View File

@ -142,7 +142,9 @@ public class TrieField extends PrimitiveFieldType {
Object missingValue = null; Object missingValue = null;
boolean sortMissingLast = field.sortMissingLast(); boolean sortMissingLast = field.sortMissingLast();
boolean sortMissingFirst = field.sortMissingFirst(); boolean sortMissingFirst = field.sortMissingFirst();
SortField sf;
switch (type) { switch (type) {
case INTEGER: case INTEGER:
if( sortMissingLast ) { if( sortMissingLast ) {
@ -151,7 +153,9 @@ public class TrieField extends PrimitiveFieldType {
else if( sortMissingFirst ) { else if( sortMissingFirst ) {
missingValue = top ? Integer.MAX_VALUE : Integer.MIN_VALUE; missingValue = top ? Integer.MAX_VALUE : Integer.MIN_VALUE;
} }
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top).setMissingValue(missingValue); sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top);
sf.setMissingValue(missingValue);
return sf;
case FLOAT: case FLOAT:
if( sortMissingLast ) { if( sortMissingLast ) {
@ -160,7 +164,9 @@ public class TrieField extends PrimitiveFieldType {
else if( sortMissingFirst ) { else if( sortMissingFirst ) {
missingValue = top ? Float.POSITIVE_INFINITY : Float.NEGATIVE_INFINITY; missingValue = top ? Float.POSITIVE_INFINITY : Float.NEGATIVE_INFINITY;
} }
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top).setMissingValue(missingValue); sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top);
sf.setMissingValue(missingValue);
return sf;
case DATE: // fallthrough case DATE: // fallthrough
case LONG: case LONG:
@ -170,7 +176,9 @@ public class TrieField extends PrimitiveFieldType {
else if( sortMissingFirst ) { else if( sortMissingFirst ) {
missingValue = top ? Long.MAX_VALUE : Long.MIN_VALUE; missingValue = top ? Long.MAX_VALUE : Long.MIN_VALUE;
} }
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top).setMissingValue(missingValue); sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top);
sf.setMissingValue(missingValue);
return sf;
case DOUBLE: case DOUBLE:
if( sortMissingLast ) { if( sortMissingLast ) {
@ -179,7 +187,9 @@ public class TrieField extends PrimitiveFieldType {
else if( sortMissingFirst ) { else if( sortMissingFirst ) {
missingValue = top ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; missingValue = top ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
} }
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top).setMissingValue(missingValue); sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top);
sf.setMissingValue(missingValue);
return sf;
default: default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);

View File

@ -1,310 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.packed.PackedInts;
public class MissingStringLastComparatorSource extends FieldComparatorSource {
private final BytesRef missingValueProxy;
public MissingStringLastComparatorSource() {
this(UnicodeUtil.BIG_TERM);
}
/** Creates a {@link FieldComparatorSource} that sorts null last in a normal ascending sort.
* <tt>missingValueProxy</tt> as the value to return from FieldComparator.value()
*
* @param missingValueProxy The value returned when sortValue() is called for a document missing the sort field.
* This value is *not* normally used for sorting.
*/
public MissingStringLastComparatorSource(BytesRef missingValueProxy) {
this.missingValueProxy=missingValueProxy;
}
@Override
public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
return new TermOrdValComparator_SML(numHits, fieldname, sortPos, reversed, missingValueProxy);
}
}
// Copied from Lucene's TermOrdValComparator and modified since the Lucene version couldn't
// be extended.
class TermOrdValComparator_SML extends FieldComparator<Comparable> {
private static final int NULL_ORD = Integer.MAX_VALUE-1;
private final int[] ords;
private final BytesRef[] values;
private final int[] readerGen;
private SortedDocValues termsIndex;
private final String field;
private final BytesRef NULL_VAL;
private PerSegmentComparator current;
public TermOrdValComparator_SML(int numHits, String field, int sortPos, boolean reversed, BytesRef nullVal) {
ords = new int[numHits];
values = new BytesRef[numHits];
readerGen = new int[numHits];
this.field = field;
this.NULL_VAL = nullVal;
}
@Override
public int compare(int slot1, int slot2) {
throw new UnsupportedOperationException();
}
@Override
public void setBottom(int slot) {
throw new UnsupportedOperationException();
}
@Override
public int compareBottom(int doc) {
throw new UnsupportedOperationException();
}
@Override
public void copy(int slot, int doc) {
throw new UnsupportedOperationException();
}
@Override
public BytesRef value(int slot) {
throw new UnsupportedOperationException();
}
@Override
public int compareValues(Comparable first, Comparable second) {
if (first == null) {
if (second == null) {
return 0;
} else {
return 1;
}
} else if (second == null) {
return -1;
} else {
return first.compareTo(second);
}
}
@Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
return TermOrdValComparator_SML.createComparator(context.reader(), this);
}
@Override
public int compareDocToValue(int doc, Comparable docValue) {
throw new UnsupportedOperationException();
}
// Base class for specialized (per bit width of the
// ords) per-segment comparator. NOTE: this is messy;
// we do this only because hotspot can't reliably inline
// the underlying array access when looking up doc->ord
private static abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
protected TermOrdValComparator_SML parent;
protected final int[] ords;
protected final BytesRef[] values;
protected final int[] readerGen;
protected int currentReaderGen = -1;
protected SortedDocValues termsIndex;
protected int bottomSlot = -1;
protected int bottomOrd;
protected boolean bottomSameReader = false;
protected BytesRef bottomValue;
protected final BytesRef tempBR = new BytesRef();
public PerSegmentComparator(TermOrdValComparator_SML parent) {
this.parent = parent;
PerSegmentComparator previous = parent.current;
if (previous != null) {
currentReaderGen = previous.currentReaderGen;
bottomSlot = previous.bottomSlot;
bottomOrd = previous.bottomOrd;
bottomValue = previous.bottomValue;
}
ords = parent.ords;
values = parent.values;
readerGen = parent.readerGen;
termsIndex = parent.termsIndex;
currentReaderGen++;
}
@Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
return TermOrdValComparator_SML.createComparator(context.reader(), parent);
}
@Override
public int compare(int slot1, int slot2) {
if (readerGen[slot1] == readerGen[slot2]) {
return ords[slot1] - ords[slot2];
}
final BytesRef val1 = values[slot1];
final BytesRef val2 = values[slot2];
if (val1 == null) {
if (val2 == null) {
return 0;
}
return 1;
} else if (val2 == null) {
return -1;
}
return val1.compareTo(val2);
}
@Override
public void setBottom(final int bottom) {
bottomSlot = bottom;
bottomValue = values[bottomSlot];
if (currentReaderGen == readerGen[bottomSlot]) {
bottomOrd = ords[bottomSlot];
bottomSameReader = true;
} else {
if (bottomValue == null) {
// -1 ord is null for all segments
assert ords[bottomSlot] == NULL_ORD;
bottomOrd = NULL_ORD;
bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen;
} else {
final int index = termsIndex.lookupTerm(bottomValue);
if (index < 0) {
bottomOrd = -index - 2;
bottomSameReader = false;
} else {
bottomOrd = index;
// exact value match
bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen;
ords[bottomSlot] = bottomOrd;
}
}
}
}
@Override
public BytesRef value(int slot) {
return values==null ? parent.NULL_VAL : values[slot];
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
int docOrd = termsIndex.getOrd(doc);
if (docOrd == -1) {
if (value == null) {
return 0;
}
return 1;
} else if (value == null) {
return -1;
}
termsIndex.lookupOrd(docOrd, tempBR);
return tempBR.compareTo(value);
}
}
private static final class AnyOrdComparator extends PerSegmentComparator {
public AnyOrdComparator(TermOrdValComparator_SML parent) {
super(parent);
}
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
int order = termsIndex.getOrd(doc);
if (order == -1) order = NULL_ORD;
if (bottomSameReader) {
// ord is precisely comparable, even in the equal
// case
return bottomOrd - order;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
// take care of the case where both vals are null
if (order == NULL_ORD) {
return 0;
}
// and at this point we know that neither value is null, so safe to compare
if (order == NULL_ORD) {
return bottomValue.compareTo(parent.NULL_VAL);
} else {
termsIndex.lookupOrd(order, tempBR);
return bottomValue.compareTo(tempBR);
}
}
}
@Override
public void copy(int slot, int doc) {
int ord = termsIndex.getOrd(doc);
if (ord == -1) {
ords[slot] = NULL_ORD;
values[slot] = null;
} else {
ords[slot] = ord;
assert ord >= 0;
if (values[slot] == null) {
values[slot] = new BytesRef();
}
termsIndex.lookupOrd(ord, values[slot]);
}
readerGen[slot] = currentReaderGen;
}
}
public static FieldComparator createComparator(AtomicReader reader, TermOrdValComparator_SML parent) throws IOException {
parent.termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, parent.field);
PerSegmentComparator perSegComp = new AnyOrdComparator(parent);
if (perSegComp.bottomSlot != -1) {
perSegComp.setBottom(perSegComp.bottomSlot);
}
parent.current = perSegComp;
return perSegComp;
}
}

View File

@ -40,18 +40,25 @@ public class Sorting {
* @return SortField * @return SortField
*/ */
public static SortField getStringSortField(String fieldName, boolean reverse, boolean nullLast, boolean nullFirst) { public static SortField getStringSortField(String fieldName, boolean reverse, boolean nullLast, boolean nullFirst) {
if (nullLast) { if (nullFirst && nullLast) {
if (!reverse) return new SortField(fieldName, nullStringLastComparatorSource); throw new IllegalArgumentException("Cannot specify missing values as both first and last");
else return new SortField(fieldName, SortField.Type.STRING, true);
} else if (nullFirst) {
if (reverse) return new SortField(fieldName, nullStringLastComparatorSource, true);
else return new SortField(fieldName, SortField.Type.STRING, false);
} else {
return new SortField(fieldName, SortField.Type.STRING, reverse);
} }
SortField sortField = new SortField(fieldName, SortField.Type.STRING, reverse);
// 4 cases:
// missingFirst / forward: default lucene behavior
// missingFirst / reverse: set sortMissingLast
// missingLast / forward: set sortMissingLast
// missingLast / reverse: default lucene behavior
if (nullFirst && reverse) {
sortField.setMissingValue(SortField.STRING_LAST);
} else if (nullLast && !reverse) {
sortField.setMissingValue(SortField.STRING_LAST);
}
return sortField;
} }
static final FieldComparatorSource nullStringLastComparatorSource = new MissingStringLastComparatorSource(null);
} }

View File

@ -88,6 +88,15 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer<Lis
SearchGroup<BytesRef> searchGroup = new SearchGroup<BytesRef>(); SearchGroup<BytesRef> searchGroup = new SearchGroup<BytesRef>();
searchGroup.groupValue = rawSearchGroup.getKey() != null ? new BytesRef(rawSearchGroup.getKey()) : null; searchGroup.groupValue = rawSearchGroup.getKey() != null ? new BytesRef(rawSearchGroup.getKey()) : null;
searchGroup.sortValues = rawSearchGroup.getValue().toArray(new Comparable[rawSearchGroup.getValue().size()]); searchGroup.sortValues = rawSearchGroup.getValue().toArray(new Comparable[rawSearchGroup.getValue().size()]);
for (int i = 0; i < searchGroup.sortValues.length; i++) {
SchemaField field = groupSort.getSort()[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSort.getSort()[i].getField()) : null;
if (field != null) {
FieldType fieldType = field.getType();
if (searchGroup.sortValues[i] != null) {
searchGroup.sortValues[i] = fieldType.unmarshalSortValue(searchGroup.sortValues[i]);
}
}
}
searchGroups.add(searchGroup); searchGroups.add(searchGroup);
} }
} }
@ -99,22 +108,17 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer<Lis
} }
private NamedList serializeSearchGroup(Collection<SearchGroup<BytesRef>> data, Sort groupSort) { private NamedList serializeSearchGroup(Collection<SearchGroup<BytesRef>> data, Sort groupSort) {
NamedList<Comparable[]> result = new NamedList<Comparable[]>(); NamedList<Object[]> result = new NamedList<Object[]>();
CharsRef spare = new CharsRef();
for (SearchGroup<BytesRef> searchGroup : data) { for (SearchGroup<BytesRef> searchGroup : data) {
Comparable[] convertedSortValues = new Comparable[searchGroup.sortValues.length]; Object[] convertedSortValues = new Object[searchGroup.sortValues.length];
for (int i = 0; i < searchGroup.sortValues.length; i++) { for (int i = 0; i < searchGroup.sortValues.length; i++) {
Comparable sortValue = (Comparable) searchGroup.sortValues[i]; Object sortValue = searchGroup.sortValues[i];
SchemaField field = groupSort.getSort()[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSort.getSort()[i].getField()) : null; SchemaField field = groupSort.getSort()[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSort.getSort()[i].getField()) : null;
if (field != null) { if (field != null) {
FieldType fieldType = field.getType(); FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) { if (sortValue != null) {
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare); sortValue = fieldType.marshalSortValue(sortValue);
String indexedValue = spare.toString();
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 1.0f));
} else if (sortValue instanceof String) {
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 1.0f));
} }
} }
convertedSortValues[i] = sortValue; convertedSortValues[i] = sortValue;

View File

@ -93,6 +93,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
public Map<String, ?> transformToNative(NamedList<NamedList> shardResponse, Sort groupSort, Sort sortWithinGroup, String shard) { public Map<String, ?> transformToNative(NamedList<NamedList> shardResponse, Sort groupSort, Sort sortWithinGroup, String shard) {
Map<String, Object> result = new HashMap<String, Object>(); Map<String, Object> result = new HashMap<String, Object>();
final IndexSchema schema = rb.req.getSearcher().getSchema();
for (Map.Entry<String, NamedList> entry : shardResponse) { for (Map.Entry<String, NamedList> entry : shardResponse) {
String key = entry.getKey(); String key = entry.getKey();
NamedList commandResult = entry.getValue(); NamedList commandResult = entry.getValue();
@ -124,6 +126,15 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
Object sortValuesVal = document.get("sortValues"); Object sortValuesVal = document.get("sortValues");
if (sortValuesVal != null) { if (sortValuesVal != null) {
sortValues = ((List) sortValuesVal).toArray(); sortValues = ((List) sortValuesVal).toArray();
for (int k = 0; k < sortValues.length; k++) {
SchemaField field = groupSort.getSort()[k].getField() != null ? schema.getFieldOrNull(groupSort.getSort()[k].getField()) : null;
if (field != null) {
FieldType fieldType = field.getType();
if (sortValues[k] != null) {
sortValues[k] = fieldType.unmarshalSortValue(sortValues[k]);
}
}
}
} }
else { else {
log.warn("doc {} has null 'sortValues'", document); log.warn("doc {} has null 'sortValues'", document);
@ -158,6 +169,15 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
score = Float.NaN; score = Float.NaN;
} }
Object[] sortValues = ((List) document.get("sortValues")).toArray(); Object[] sortValues = ((List) document.get("sortValues")).toArray();
for (int k = 0; k < sortValues.length; k++) {
SchemaField field = sortWithinGroup.getSort()[k].getField() != null ? schema.getFieldOrNull(sortWithinGroup.getSort()[k].getField()) : null;
if (field != null) {
FieldType fieldType = field.getType();
if (sortValues[k] != null) {
sortValues[k] = fieldType.unmarshalSortValue(sortValues[k]);
}
}
}
scoreDocs[j++] = new ShardDoc(score, sortValues, uniqueId, shard); scoreDocs[j++] = new ShardDoc(score, sortValues, uniqueId, shard);
} }
@ -217,12 +237,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
SchemaField field = sortWithinGroup.getSort()[j].getField() != null ? schema.getFieldOrNull(sortWithinGroup.getSort()[j].getField()) : null; SchemaField field = sortWithinGroup.getSort()[j].getField() != null ? schema.getFieldOrNull(sortWithinGroup.getSort()[j].getField()) : null;
if (field != null) { if (field != null) {
FieldType fieldType = field.getType(); FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) { if (sortValue != null) {
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare); sortValue = fieldType.marshalSortValue(sortValue);
String indexedValue = spare.toString();
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 1.0f));
} else if (sortValue instanceof String) {
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 1.0f));
} }
} }
convertedSortValues[j] = sortValue; convertedSortValues[j] = sortValue;
@ -272,12 +288,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
? schema.getFieldOrNull(groupSort.getSort()[j].getField()) : null; ? schema.getFieldOrNull(groupSort.getSort()[j].getField()) : null;
if (field != null) { if (field != null) {
FieldType fieldType = field.getType(); FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) { if (sortValue != null) {
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare); sortValue = fieldType.marshalSortValue(sortValue);
String indexedValue = spare.toString();
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 1.0f));
} else if (sortValue instanceof String) {
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 1.0f));
} }
} }
convertedSortValues[j] = sortValue; convertedSortValues[j] = sortValue;