Guarantee sorted order for [Double|Long|Bytes]Values

Values returned by [Double|Long|Bytes]Values are sorted today which
is guaranteed by the underlying Lucene index. Several implementations can
make use of this property but the interfaces don't guarantee this behavior.
This commit adds the guarantees and makes use of them in several places.

Note: This change might require sorting for 3rd party implemenations of these
interaces.
This commit is contained in:
Simon Willnauer 2013-10-31 14:18:16 +01:00
parent f5f6259d03
commit a9239f8147
14 changed files with 313 additions and 69 deletions

View File

@ -58,6 +58,12 @@ public abstract class AbstractAtomicNumericFieldData implements AtomicNumericFie
scratch.copyChars(Double.toString(values.nextValue()));
return scratch;
}
@Override
public Order getOrder() {
return values.getOrder();
}
};
} else {
final LongValues values = getLongValues();
@ -74,6 +80,11 @@ public abstract class AbstractAtomicNumericFieldData implements AtomicNumericFie
scratch.copyChars(Long.toString(values.nextValue()));
return scratch;
}
@Override
public Order getOrder() {
return values.getOrder();
}
};
}
}

View File

@ -79,4 +79,36 @@ public interface AtomicFieldData<Script extends ScriptDocValues> {
*/
BytesValues.WithOrdinals getBytesValues(boolean needsHashes);
}
/**
* This enum provides information about the order of the values for
* a given document. For instance {@link BytesValues} by default
* return values in {@link #BYTES} order but if the interface
* wraps a numeric variant the sort order might change to {@link #NUMERIC}.
* In that case the values might not be returned in byte sort order but in numeric
* order instead while maintaining the property of <tt>N < N+1</tt> during the
* value iterations.
*
* @see org.elasticsearch.index.fielddata.BytesValues#getOrder()
* @see org.elasticsearch.index.fielddata.DoubleValues#getOrder()
* @see org.elasticsearch.index.fielddata.LongValues#getOrder()
*/
public enum Order {
/**
* Donates Byte sort order
*/
BYTES,
/**
* Donates Numeric sort order
*/
NUMERIC,
/**
* Donates custom sort order
*/
CUSTOM,
/**
* Donates no sort order
*/
NONE
}
}

View File

@ -87,7 +87,13 @@ public abstract class BytesValues {
* Returns the next value for the current docID set to {@link #setDocument(int)}.
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* is undefined. This interface guarantees that the values are returned in order.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* Note: the returned {@link BytesRef} might be shared across invocations.
*
@ -104,6 +110,14 @@ public abstract class BytesValues {
return scratch.hashCode();
}
/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link BytesValues} have {@link AtomicFieldData.Order#BYTES} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.BYTES;
}
/**
* Ordinal based {@link BytesValues}.
*/

View File

@ -76,11 +76,25 @@ public abstract class DoubleValues {
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* @return the next value for the current docID set to {@link #setDocument(int)}.
*/
public abstract double nextValue();
/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link DoubleValues} have {@link AtomicFieldData.Order#NUMERIC} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.NUMERIC;
}
/**
* Ordinal based {@link DoubleValues}.
*/

View File

@ -53,4 +53,9 @@ public abstract class FilterBytesValues extends BytesValues {
public int currentValueHash() {
return delegate.currentValueHash();
}
@Override
public AtomicFieldData.Order getOrder() {
return delegate.getOrder();
}
}

View File

@ -40,4 +40,11 @@ public abstract class FilterDoubleValues extends DoubleValues {
public double nextValue() {
return delegate.nextValue();
}
@Override
public AtomicFieldData.Order getOrder() {
return delegate.getOrder();
}
}

View File

@ -42,4 +42,9 @@ public class FilterLongValues extends LongValues {
return delegate.nextValue();
}
@Override
public AtomicFieldData.Order getOrder() {
return delegate.getOrder();
}
}

View File

@ -72,6 +72,12 @@ public abstract class GeoPointValues {
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* Note: the returned {@link GeoPoint} might be shared across invocations.
*
@ -79,6 +85,14 @@ public abstract class GeoPointValues {
*/
public abstract GeoPoint nextValue();
/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link GeoPointValues} have {@link AtomicFieldData.Order#NONE} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.NONE;
}
/**
* An empty {@link GeoPointValues} implementation
*/

View File

@ -77,11 +77,25 @@ public abstract class LongValues {
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* @return the next value for the current docID set to {@link #setDocument(int)}.
*/
public abstract long nextValue();
/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link LongValues} have {@link AtomicFieldData.Order#NUMERIC} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.NUMERIC;
}
/**
* Ordinal based {@link LongValues}.
*/

View File

@ -21,11 +21,8 @@ package org.elasticsearch.index.fielddata.fieldcomparator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.FilterBytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import java.io.IOException;
@ -62,30 +59,26 @@ public final class BytesRefValComparator extends NestedWrappableComparator<Bytes
@Override
public int compareBottom(int doc) throws IOException {
int length = docTerms.setDocument(doc); // safes one hasValue lookup
BytesRef val2 = length == 0 ? missingValue : docTerms.nextValue();
BytesRef val2 = sortMode.getRelevantValue(docTerms, doc, missingValue);
return compareValues(bottom, val2);
}
@Override
public void copy(int slot, int doc) throws IOException {
int length = docTerms.setDocument(doc); // safes one hasValue lookup
if (length == 0) {
BytesRef relevantValue = sortMode.getRelevantValue(docTerms, doc, missingValue);
if (relevantValue == missingValue) {
values[slot] = missingValue;
} else {
if (values[slot] == null || values[slot] == missingValue) {
values[slot] = new BytesRef();
}
values[slot].copyBytes(docTerms.nextValue());
values[slot].copyBytes(relevantValue);
}
}
@Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
docTerms = indexFieldData.load(context).getBytesValues(false);
if (docTerms.isMultiValued()) {
docTerms = new MultiValuedBytesWrapper(docTerms, sortMode);
}
return this;
}
@ -114,53 +107,7 @@ public final class BytesRefValComparator extends NestedWrappableComparator<Bytes
@Override
public int compareDocToValue(int doc, BytesRef value) {
final int length = docTerms.setDocument(doc); // safes one hasValue lookup
return (length == 0 ? missingValue : docTerms.nextValue()).compareTo(value);
}
private static final class MultiValuedBytesWrapper extends FilterBytesValues {
private final SortMode sortMode;
private int numValues;
public MultiValuedBytesWrapper(BytesValues delegate, SortMode sortMode) {
super(delegate);
this.sortMode = sortMode;
}
public int setDocument(int docId) {
// either 0 or 1
return Math.min(1, (numValues = delegate.setDocument(docId)));
}
public BytesRef nextValue() {
BytesRef currentVal = delegate.nextValue();
// We MUST allocate a new byte[] since relevantVal might have been filled by reference by a PagedBytes instance
// meaning that the BytesRef.bytes are shared and shouldn't be overwritten. We can't use the bytes of the iterator
// either because they will be overwritten by subsequent calls in the current thread
scratch.bytes = new byte[ArrayUtil.oversize(currentVal.length, RamUsageEstimator.NUM_BYTES_BYTE)];
scratch.offset = 0;
scratch.length = currentVal.length;
System.arraycopy(currentVal.bytes, currentVal.offset, scratch.bytes, 0, currentVal.length);
for (int i = 1; i < numValues; i++) {
currentVal = delegate.nextValue();
if (sortMode == SortMode.MAX) {
if (currentVal.compareTo(scratch) > 0) {
scratch.grow(currentVal.length);
scratch.length = currentVal.length;
System.arraycopy(currentVal.bytes, currentVal.offset, scratch.bytes, 0, currentVal.length);
}
} else {
if (currentVal.compareTo(scratch) < 0) {
scratch.grow(currentVal.length);
scratch.length = currentVal.length;
System.arraycopy(currentVal.bytes, currentVal.offset, scratch.bytes, 0, currentVal.length);
}
}
}
return scratch;
}
return sortMode.getRelevantValue(docTerms, doc, missingValue).compareTo(value);
}
@Override

View File

@ -20,7 +20,10 @@
package org.elasticsearch.index.fielddata.fieldcomparator;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.index.fielddata.LongValues;
@ -125,6 +128,45 @@ public enum SortMode {
public long startLong() {
return Long.MAX_VALUE;
}
/**
* Returns the first value returned for the given <tt>docId</tt> or the <tt>defaultValue</tt> if the document
* has no values.
*/
@Override
public double getRelevantValue(DoubleValues values, int docId, double defaultValue) {
assert values.getOrder() != AtomicFieldData.Order.NONE;
if (values.setDocument(docId) > 0) {
return values.nextValue();
}
return defaultValue;
}
/**
* Returns the first value returned for the given <tt>docId</tt> or the <tt>defaultValue</tt> if the document
* has no values.
*/
@Override
public long getRelevantValue(LongValues values, int docId, long defaultValue) {
assert values.getOrder() != AtomicFieldData.Order.NONE;
if (values.setDocument(docId) > 0) {
return values.nextValue();
}
return defaultValue;
}
/**
* Returns the first value returned for the given <tt>docId</tt> or the <tt>defaultValue</tt> if the document
* has no values.
*/
@Override
public BytesRef getRelevantValue(BytesValues values, int docId, BytesRef defaultValue) {
assert values.getOrder() != AtomicFieldData.Order.NONE;
if (values.setDocument(docId) > 0) {
return values.nextValue();
}
return defaultValue;
}
},
/**
@ -163,6 +205,51 @@ public enum SortMode {
public long startLong() {
return Long.MIN_VALUE;
}
/**
* Returns the last value returned for the given <tt>docId</tt> or the <tt>defaultValue</tt> if the document
* has no values.
*/
@Override
public double getRelevantValue(DoubleValues values, int docId, double defaultValue) {
assert values.getOrder() != AtomicFieldData.Order.NONE;
final int numValues = values.setDocument(docId);
double retVal = defaultValue;
for (int i = 0; i < numValues; i++) {
retVal = values.nextValue();
}
return retVal;
}
/**
* Returns the last value returned for the given <tt>docId</tt> or the <tt>defaultValue</tt> if the document
* has no values.
*/
@Override
public long getRelevantValue(LongValues values, int docId, long defaultValue) {
assert values.getOrder() != AtomicFieldData.Order.NONE;
final int numValues = values.setDocument(docId);
long retVal = defaultValue;
for (int i = 0; i < numValues; i++) {
retVal = values.nextValue();
}
return retVal;
}
/**
* Returns the last value returned for the given <tt>docId</tt> or the <tt>defaultValue</tt> if the document
* has no values.
*/
@Override
public BytesRef getRelevantValue(BytesValues values, int docId, BytesRef defaultValue) {
assert values.getOrder() != AtomicFieldData.Order.NONE;
final int numValues = values.setDocument(docId);
BytesRef currentVal = defaultValue;
for (int i = 0; i < numValues; i++) {
currentVal = values.nextValue();
}
return currentVal;
}
};
/**
@ -265,8 +352,17 @@ public enum SortMode {
}
}
public final double getRelevantValue(DoubleValues values, int docId, double defaultValue) {
/**
* Returns the relevant value for the given document based on the {@link SortMode}. This
* method will apply each value for the given document to {@link #apply(double, double)} and returns
* the reduced value from {@link #reduce(double, int)} if the document has at least one value. Otherwise it will
* return the given default value.
* @param values the values to fetch the relevant value from.
* @param docId the doc id to fetch the relevant value for.
* @param defaultValue the default value if the document has no value
* @return the relevant value or the default value passed to the method.
*/
public double getRelevantValue(DoubleValues values, int docId, double defaultValue) {
final int numValues = values.setDocument(docId);
double relevantVal = startDouble();
double result = defaultValue;
@ -276,7 +372,17 @@ public enum SortMode {
return reduce(result, numValues);
}
public final long getRelevantValue(LongValues values, int docId, long defaultValue) {
/**
* Returns the relevant value for the given document based on the {@link SortMode}. This
* method will apply each value for the given document to {@link #apply(long, long)} and returns
* the reduced value from {@link #reduce(long, int)} if the document has at least one value. Otherwise it will
* return the given default value.
* @param values the values to fetch the relevant value from.
* @param docId the doc id to fetch the relevant value for.
* @param defaultValue the default value if the document has no value
* @return the relevant value or the default value passed to the method.
*/
public long getRelevantValue(LongValues values, int docId, long defaultValue) {
final int numValues = values.setDocument(docId);
long relevantVal = startLong();
long result = defaultValue;
@ -286,4 +392,20 @@ public enum SortMode {
return reduce(result, numValues);
}
/**
* Returns the relevant value for the given document based on the {@link SortMode}
* if the document has at least one value. Otherwise it will return same object given as the default value.
* Note: This method is optional and will throw {@link UnsupportedOperationException} if the sort mode doesn't
* allow a relevant value.
*
* @param values the values to fetch the relevant value from.
* @param docId the doc id to fetch the relevant value for.
* @param defaultValue the default value if the document has no value. This object will never be modified.
* @return the relevant value or the default value passed to the method.
*/
public BytesRef getRelevantValue(BytesValues values, int docId, BytesRef defaultValue) {
throw new UnsupportedOperationException("no relevant bytes value for sort mode: " + this.name());
}
}

View File

@ -93,6 +93,11 @@ public class SortedSetDVNumericAtomicFieldData extends SortedSetDVAtomicFieldDat
public BytesRef getValueByOrd(long ord) {
return convert(values.getValueByOrd(ord), scratch);
}
@Override
public Order getOrder() {
return Order.NUMERIC;
}
};
}
}

View File

@ -34,6 +34,8 @@ import java.util.*;
import java.util.Map.Entry;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.lessThan;
public class DuelFieldDataTests extends AbstractFieldDataTests {
@ -253,6 +255,8 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
ifdService.clear();
IndexNumericFieldData rightFieldData = ifdService.getForField(new FieldMapper.Names(right.getValue().name().toLowerCase(Locale.ROOT)),
right.getKey(), true);
assertOrder(left.getValue().order(), leftFieldData, context);
assertOrder(right.getValue().order(), rightFieldData, context);
duelFieldDataDouble(random, context, leftFieldData, rightFieldData);
duelFieldDataDouble(random, context, rightFieldData, leftFieldData);
@ -327,6 +331,8 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
CompositeReaderContext composite = perSegment.getContext();
List<AtomicReaderContext> leaves = composite.leaves();
for (AtomicReaderContext atomicReaderContext : leaves) {
assertOrder(AtomicFieldData.Order.BYTES, leftFieldData, atomicReaderContext);
assertOrder(AtomicFieldData.Order.BYTES, rightFieldData, atomicReaderContext);
duelFieldDataBytes(random, atomicReaderContext, leftFieldData, rightFieldData, pre);
}
perSegment.close();
@ -334,6 +340,11 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
}
private void assertOrder(AtomicFieldData.Order order, IndexFieldData<?> data, AtomicReaderContext context) throws Exception {
AtomicFieldData<?> leftData = randomBoolean() ? data.load(context) : data.loadDirect(context);
assertThat(leftData.getBytesValues(randomBoolean()).getOrder(), is(order));
}
private int[] getNumbers(Random random, int margin) {
if (random.nextInt(20) == 0) {
int[] num = new int[1 + random.nextInt(10)];
@ -360,11 +371,17 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
for (int i = 0; i < numDocs; i++) {
int numValues = 0;
assertThat((numValues = leftBytesValues.setDocument(i)), equalTo(rightBytesValues.setDocument(i)));
BytesRef previous = null;
for (int j = 0; j < numValues; j++) {
rightSpare.copyBytes(rightBytesValues.nextValue());
leftSpare.copyBytes(leftBytesValues.nextValue());
assertThat(rightSpare.hashCode(), equalTo(rightBytesValues.currentValueHash()));
assertThat(leftSpare.hashCode(), equalTo(leftBytesValues.currentValueHash()));
if (previous != null && leftBytesValues.getOrder() == rightBytesValues.getOrder()) { // we can only compare the
assertThat(pre.compare(previous, rightSpare), lessThan(0));
}
previous = BytesRef.deepCopyOf(rightSpare);
pre.toString(rightSpare);
pre.toString(leftSpare);
assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare)));
@ -388,8 +405,14 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
for (int i = 0; i < numDocs; i++) {
int numValues = 0;
assertThat((numValues = leftDoubleValues.setDocument(i)), equalTo(rightDoubleValues.setDocument(i)));
double previous = 0;
for (int j = 0; j < numValues; j++) {
assertThat(leftDoubleValues.nextValue(), equalTo(rightDoubleValues.nextValue()));
double current;
assertThat(leftDoubleValues.nextValue(), equalTo(current = rightDoubleValues.nextValue()));
if (j > 0) {
assertThat(Double.compare(previous,current), lessThan(0));
}
previous = current;
}
}
}
@ -405,9 +428,15 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
LongValues rightLongValues = rightData.getLongValues();
for (int i = 0; i < numDocs; i++) {
int numValues = 0;
long previous = 0;
assertThat((numValues = leftLongValues.setDocument(i)), equalTo(rightLongValues.setDocument(i)));
for (int j = 0; j < numValues; j++) {
assertThat(leftLongValues.nextValue(), equalTo(rightLongValues.nextValue()));
long current;
assertThat(leftLongValues.nextValue(), equalTo(current = rightLongValues.nextValue()));
if (j > 0) {
assertThat(previous, lessThan(current));
}
previous = current;
}
}
}
@ -418,18 +447,39 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
public String toString(BytesRef ref) {
return ref.utf8ToString();
}
public int compare(BytesRef a, BytesRef b) {
return a.compareTo(b);
}
}
private static class ToDoublePreprocessor extends Preprocessor {
@Override
public String toString(BytesRef ref) {
assert ref.length > 0;
return Double.toString(Double.parseDouble(super.toString(ref)));
}
@Override
public int compare(BytesRef a, BytesRef b) {
Double _a = Double.parseDouble(super.toString(a));
return _a.compareTo(Double.parseDouble(super.toString(b)));
}
}
private static enum Type {
Float, Double, Integer, Long, Bytes
Float(AtomicFieldData.Order.NUMERIC), Double(AtomicFieldData.Order.NUMERIC), Integer(AtomicFieldData.Order.NUMERIC), Long(AtomicFieldData.Order.NUMERIC), Bytes(AtomicFieldData.Order.BYTES);
private final AtomicFieldData.Order order;
Type(AtomicFieldData.Order order) {
this.order = order;
}
public AtomicFieldData.Order order() {
return order;
}
}
}

View File

@ -35,9 +35,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.*;
/**
* Tests for all integer types (byte, short, int, long).
@ -358,8 +356,14 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests {
}
doubleSet.clear();
numValues = doubleData.setDocument(i);
double prev = 0;
for (int j = 0; j < numValues; j++) {
doubleSet.add(doubleData.nextValue());
double current;
doubleSet.add(current = doubleData.nextValue());
if (j > 0) {
assertThat(prev, lessThan(current));
}
prev = current;
}
assertThat(doubleSet, equalTo(doubleV));
}