Skip docs with Docvalues in NumericLeafComparator (#12405)

* Skip document by docValues

*When the queue is full with only one Comparator, we could better tune the maxValueAsBytes/minValueAsBytes. For instance, if the sort is ascending and bottom value is 5, we will use a range on [MIN_VALUE, 4].
---------

Co-authored-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
Lu Xugang 2023-11-09 13:05:28 +08:00 committed by GitHub
parent 8665014fa8
commit a71d64a598
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 421 additions and 106 deletions

View File

@ -262,6 +262,8 @@ Optimizations
* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand) * GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)
* GITHUB#12381: Skip docs with DocValues in NumericLeafComparator. (Lu Xugang, Adrien Grand)
Changes in runtime behavior Changes in runtime behavior
--------------------- ---------------------

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SimpleFieldComparator; import org.apache.lucene.search.SimpleFieldComparator;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -44,7 +45,7 @@ final class FeatureSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new FeatureComparator(numHits, getField(), featureName); return new FeatureComparator(numHits, getField(), featureName);
} }

View File

@ -18,6 +18,7 @@ package org.apache.lucene.document;
import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
/** Sorts by distance from an origin location. */ /** Sorts by distance from an origin location. */
@ -38,7 +39,7 @@ final class LatLonPointSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits); return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits);
} }

View File

@ -17,6 +17,7 @@
package org.apache.lucene.document; package org.apache.lucene.document;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
/** Sorts by distance from an origin location. */ /** Sorts by distance from an origin location. */
@ -35,7 +36,7 @@ final class XYPointSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new XYPointDistanceComparator(getField(), x, y, numHits); return new XYPointDistanceComparator(getField(), x, y, numHits);
} }

View File

@ -60,6 +60,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
@ -1119,7 +1120,7 @@ public final class CheckIndex implements Closeable {
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1; reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, false).getLeafComparator(readerContext); comparators[i] = fields[i].getComparator(1, Pruning.NONE).getLeafComparator(readerContext);
} }
int maxDoc = reader.maxDoc(); int maxDoc = reader.maxDoc();

View File

@ -528,8 +528,8 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
@Override @Override
public FieldComparator<Double> newComparator( public FieldComparator<Double> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) { String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new DoubleComparator(numHits, fieldname, missingValue, reversed, false) { return new DoubleComparator(numHits, fieldname, missingValue, reversed, Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
DoubleValuesHolder holder = new DoubleValuesHolder(); DoubleValuesHolder holder = new DoubleValuesHolder();

View File

@ -30,5 +30,5 @@ public abstract class FieldComparatorSource {
* @return FieldComparator. * @return FieldComparator.
*/ */
public abstract FieldComparator<?> newComparator( public abstract FieldComparator<?> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed); String fieldname, int numHits, Pruning pruning, boolean reversed);
} }

View File

@ -125,7 +125,7 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry>
// need to check it again. // need to check it again.
// All these are required by this class's API - need to return arrays. // All these are required by this class's API - need to return arrays.
// Therefore even in the case of a single comparator, create an array // Therefore, even in the case of a single comparator, create an array
// anyway. // anyway.
this.fields = fields; this.fields = fields;
int numComparators = fields.length; int numComparators = fields.length;
@ -134,7 +134,12 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry>
for (int i = 0; i < numComparators; ++i) { for (int i = 0; i < numComparators; ++i) {
SortField field = fields[i]; SortField field = fields[i];
reverseMul[i] = field.reverse ? -1 : 1; reverseMul[i] = field.reverse ? -1 : 1;
comparators[i] = field.getComparator(size, i == 0); comparators[i] =
field.getComparator(
size,
i == 0
? (numComparators > 1 ? Pruning.GREATER_THAN : Pruning.GREATER_THAN_OR_EQUAL_TO)
: Pruning.NONE);
} }
} }

View File

@ -608,7 +608,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
throws IOException { throws IOException {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
FieldComparator<Number> fieldComparator = FieldComparator<Number> fieldComparator =
(FieldComparator<Number>) sortField.getComparator(1, false); (FieldComparator<Number>) sortField.getComparator(1, Pruning.NONE);
if (type == Type.INT) { if (type == Type.INT) {
fieldComparator.setTopValue((int) topValue); fieldComparator.setTopValue((int) topValue);
} else { } else {

View File

@ -337,8 +337,8 @@ public abstract class LongValuesSource implements SegmentCacheable {
@Override @Override
public FieldComparator<Long> newComparator( public FieldComparator<Long> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) { String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new LongComparator(numHits, fieldname, missingValue, reversed, false) { return new LongComparator(numHits, fieldname, missingValue, reversed, Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
LongValuesHolder holder = new LongValuesHolder(); LongValuesHolder holder = new LongValuesHolder();

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
/** Controls {@link LeafFieldComparator} how to skip documents */
public enum Pruning {
/** Not allowed to skip documents. */
NONE,
/**
* Allowed to skip documents that compare strictly better than the top value, or strictly worse
* than the bottom value.
*/
GREATER_THAN,
/**
* Allowed to skip documents that compare better than the top value, or worse than or equal to the
* bottom value.
*/
GREATER_THAN_OR_EQUAL_TO
}

View File

@ -500,11 +500,11 @@ public class SortField {
* *
* @lucene.experimental * @lucene.experimental
* @param numHits number of top hits the queue will store * @param numHits number of top hits the queue will store
* @param enableSkipping true if the comparator can skip documents via {@link * @param pruning controls how can the comparator to skip documents via {@link
* LeafFieldComparator#competitiveIterator()} * LeafFieldComparator#competitiveIterator()}
* @return {@link FieldComparator} to use when sorting * @return {@link FieldComparator} to use when sorting
*/ */
public FieldComparator<?> getComparator(final int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(final int numHits, Pruning pruning) {
final FieldComparator<?> fieldComparator; final FieldComparator<?> fieldComparator;
switch (type) { switch (type) {
case SCORE: case SCORE:
@ -512,38 +512,36 @@ public class SortField {
break; break;
case DOC: case DOC:
fieldComparator = new DocComparator(numHits, reverse, enableSkipping); fieldComparator = new DocComparator(numHits, reverse, pruning);
break; break;
case INT: case INT:
fieldComparator = fieldComparator =
new IntComparator(numHits, field, (Integer) missingValue, reverse, enableSkipping); new IntComparator(numHits, field, (Integer) missingValue, reverse, pruning);
break; break;
case FLOAT: case FLOAT:
fieldComparator = fieldComparator =
new FloatComparator(numHits, field, (Float) missingValue, reverse, enableSkipping); new FloatComparator(numHits, field, (Float) missingValue, reverse, pruning);
break; break;
case LONG: case LONG:
fieldComparator = fieldComparator = new LongComparator(numHits, field, (Long) missingValue, reverse, pruning);
new LongComparator(numHits, field, (Long) missingValue, reverse, enableSkipping);
break; break;
case DOUBLE: case DOUBLE:
fieldComparator = fieldComparator =
new DoubleComparator(numHits, field, (Double) missingValue, reverse, enableSkipping); new DoubleComparator(numHits, field, (Double) missingValue, reverse, pruning);
break; break;
case CUSTOM: case CUSTOM:
assert comparatorSource != null; assert comparatorSource != null;
fieldComparator = comparatorSource.newComparator(field, numHits, enableSkipping, reverse); fieldComparator = comparatorSource.newComparator(field, numHits, pruning, reverse);
break; break;
case STRING: case STRING:
fieldComparator = fieldComparator =
new TermOrdValComparator( new TermOrdValComparator(numHits, field, missingValue == STRING_LAST, reverse, pruning);
numHits, field, missingValue == STRING_LAST, reverse, enableSkipping);
break; break;
case STRING_VAL: case STRING_VAL:

View File

@ -241,7 +241,7 @@ public class SortedNumericSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
final FieldComparator<?> fieldComparator; final FieldComparator<?> fieldComparator;
// we can use sort optimization with points if selector is MIN or MAX, // we can use sort optimization with points if selector is MIN or MAX,
// because we can still build successful iterator over points in this case. // because we can still build successful iterator over points in this case.
@ -255,7 +255,7 @@ public class SortedNumericSortField extends SortField {
getField(), getField(),
(Integer) missingValue, (Integer) missingValue,
reverse, reverse,
enableSkipping && isMinOrMax) { isMinOrMax ? pruning : Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException { throws IOException {
@ -273,7 +273,11 @@ public class SortedNumericSortField extends SortField {
case FLOAT: case FLOAT:
fieldComparator = fieldComparator =
new FloatComparator( new FloatComparator(
numHits, getField(), (Float) missingValue, reverse, enableSkipping && isMinOrMax) { numHits,
getField(),
(Float) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException { throws IOException {
@ -291,7 +295,11 @@ public class SortedNumericSortField extends SortField {
case LONG: case LONG:
fieldComparator = fieldComparator =
new LongComparator( new LongComparator(
numHits, getField(), (Long) missingValue, reverse, enableSkipping && isMinOrMax) { numHits,
getField(),
(Long) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException { throws IOException {
@ -309,7 +317,11 @@ public class SortedNumericSortField extends SortField {
case DOUBLE: case DOUBLE:
fieldComparator = fieldComparator =
new DoubleComparator( new DoubleComparator(
numHits, getField(), (Double) missingValue, reverse, enableSkipping && isMinOrMax) { numHits,
getField(),
(Double) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException { throws IOException {

View File

@ -178,10 +178,10 @@ public class SortedSetSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
boolean finalEnableSkipping = enableSkipping && getOptimizeSortWithIndexedData(); Pruning finalPruning = getOptimizeSortWithIndexedData() ? pruning : Pruning.NONE;
return new TermOrdValComparator( return new TermOrdValComparator(
numHits, getField(), missingValue == STRING_LAST, reverse, finalEnableSkipping) { numHits, getField(), missingValue == STRING_LAST, reverse, finalPruning) {
@Override @Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
throws IOException { throws IOException {

View File

@ -159,7 +159,7 @@ public class TopDocs {
reverseMul = new int[sortFields.length]; reverseMul = new int[sortFields.length];
for (int compIDX = 0; compIDX < sortFields.length; compIDX++) { for (int compIDX = 0; compIDX < sortFields.length; compIDX++) {
final SortField sortField = sortFields[compIDX]; final SortField sortField = sortFields[compIDX];
comparators[compIDX] = sortField.getComparator(1, compIDX == 0); comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1; reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
} }
} }

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Scorable;
/** Comparator that sorts by asc _doc */ /** Comparator that sorts by asc _doc */
@ -35,10 +36,10 @@ public class DocComparator extends FieldComparator<Integer> {
private boolean hitsThresholdReached; private boolean hitsThresholdReached;
/** Creates a new comparator based on document ids for {@code numHits} */ /** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(int numHits, boolean reverse, boolean enableSkipping) { public DocComparator(int numHits, boolean reverse, Pruning pruning) {
this.docIDs = new int[numHits]; this.docIDs = new int[numHits];
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort // skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (reverse == false && enableSkipping); this.enableSkipping = (reverse == false && pruning != Pruning.NONE);
} }
@Override @Override

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
/** /**
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a * Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a
@ -32,8 +33,8 @@ public class DoubleComparator extends NumericComparator<Double> {
protected double bottom; protected double bottom;
public DoubleComparator( public DoubleComparator(
int numHits, String field, Double missingValue, boolean reverse, boolean enableSkipping) { int numHits, String field, Double missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0.0, reverse, enableSkipping, Double.BYTES); super(field, missingValue != null ? missingValue : 0.0, reverse, pruning, Double.BYTES);
values = new double[numHits]; values = new double[numHits];
} }

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
/** /**
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a * Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a
@ -32,8 +33,8 @@ public class FloatComparator extends NumericComparator<Float> {
protected float bottom; protected float bottom;
public FloatComparator( public FloatComparator(
int numHits, String field, Float missingValue, boolean reverse, boolean enableSkipping) { int numHits, String field, Float missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0.0f, reverse, enableSkipping, Float.BYTES); super(field, missingValue != null ? missingValue : 0.0f, reverse, pruning, Float.BYTES);
values = new float[numHits]; values = new float[numHits];
} }

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
/** /**
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a * Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a
@ -32,8 +33,8 @@ public class IntComparator extends NumericComparator<Integer> {
protected int bottom; protected int bottom;
public IntComparator( public IntComparator(
int numHits, String field, Integer missingValue, boolean reverse, boolean enableSkipping) { int numHits, String field, Integer missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0, reverse, enableSkipping, Integer.BYTES); super(field, missingValue != null ? missingValue : 0, reverse, pruning, Integer.BYTES);
values = new int[numHits]; values = new int[numHits];
} }

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
/** /**
* Comparator based on {@link Long#compare} for {@code numHits}. This comparator provides a skipping * Comparator based on {@link Long#compare} for {@code numHits}. This comparator provides a skipping
@ -32,8 +33,8 @@ public class LongComparator extends NumericComparator<Long> {
protected long bottom; protected long bottom;
public LongComparator( public LongComparator(
int numHits, String field, Long missingValue, boolean reverse, boolean enableSkipping) { int numHits, String field, Long missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0L, reverse, enableSkipping, Long.BYTES); super(field, missingValue != null ? missingValue : 0L, reverse, pruning, Long.BYTES);
values = new long[numHits]; values = new long[numHits];
} }

View File

@ -26,11 +26,13 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
/** /**
* Abstract numeric comparator for comparing numeric values. This comparator provides a skipping * Abstract numeric comparator for comparing numeric values. This comparator provides a skipping
@ -56,15 +58,14 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field. protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
protected boolean hitsThresholdReached; protected boolean hitsThresholdReached;
protected boolean queueFull; protected boolean queueFull;
private boolean canSkipDocuments; protected Pruning pruning;
protected NumericComparator( protected NumericComparator(
String field, T missingValue, boolean reverse, boolean enableSkipping, int bytesCount) { String field, T missingValue, boolean reverse, Pruning pruning, int bytesCount) {
this.field = field; this.field = field;
this.missingValue = missingValue; this.missingValue = missingValue;
this.reverse = reverse; this.reverse = reverse;
// skipping functionality is only relevant for primary sort this.pruning = pruning;
this.canSkipDocuments = enableSkipping;
this.bytesCount = bytesCount; this.bytesCount = bytesCount;
this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount); this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount);
} }
@ -81,11 +82,12 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
@Override @Override
public void disableSkipping() { public void disableSkipping() {
canSkipDocuments = false; pruning = Pruning.NONE;
} }
/** Leaf comparator for {@link NumericComparator} that provides skipping functionality */ /** Leaf comparator for {@link NumericComparator} that provides skipping functionality */
public abstract class NumericLeafComparator implements LeafFieldComparator { public abstract class NumericLeafComparator implements LeafFieldComparator {
private final LeafReaderContext context;
protected final NumericDocValues docValues; protected final NumericDocValues docValues;
private final PointValues pointValues; private final PointValues pointValues;
// if skipping functionality should be enabled on this segment // if skipping functionality should be enabled on this segment
@ -103,8 +105,9 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
private int tryUpdateFailCount = 0; private int tryUpdateFailCount = 0;
public NumericLeafComparator(LeafReaderContext context) throws IOException { public NumericLeafComparator(LeafReaderContext context) throws IOException {
this.context = context;
this.docValues = getNumericDocValues(context, field); this.docValues = getNumericDocValues(context, field);
this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null; this.pointValues = pruning != Pruning.NONE ? context.reader().getPointValues(field) : null;
if (pointValues != null) { if (pointValues != null) {
FieldInfo info = context.reader().getFieldInfos().fieldInfo(field); FieldInfo info = context.reader().getFieldInfos().fieldInfo(field);
if (info == null || info.getPointDimensionCount() == 0) { if (info == null || info.getPointDimensionCount() == 0) {
@ -200,22 +203,22 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
return; return;
} }
if (reverse == false) { if (reverse == false) {
if (queueFull) { // bottom is avilable only when queue is full if (queueFull) { // bottom is available only when queue is full
maxValueAsBytes = maxValueAsBytes == null ? new byte[bytesCount] : maxValueAsBytes; maxValueAsBytes = maxValueAsBytes == null ? new byte[bytesCount] : maxValueAsBytes;
encodeBottom(maxValueAsBytes); encodeBottom();
} }
if (topValueSet) { if (topValueSet) {
minValueAsBytes = minValueAsBytes == null ? new byte[bytesCount] : minValueAsBytes; minValueAsBytes = minValueAsBytes == null ? new byte[bytesCount] : minValueAsBytes;
encodeTop(minValueAsBytes); encodeTop();
} }
} else { } else {
if (queueFull) { // bottom is avilable only when queue is full if (queueFull) { // bottom is available only when queue is full
minValueAsBytes = minValueAsBytes == null ? new byte[bytesCount] : minValueAsBytes; minValueAsBytes = minValueAsBytes == null ? new byte[bytesCount] : minValueAsBytes;
encodeBottom(minValueAsBytes); encodeBottom();
} }
if (topValueSet) { if (topValueSet) {
maxValueAsBytes = maxValueAsBytes == null ? new byte[bytesCount] : maxValueAsBytes; maxValueAsBytes = maxValueAsBytes == null ? new byte[bytesCount] : maxValueAsBytes;
encodeTop(maxValueAsBytes); encodeTop();
} }
} }
@ -244,15 +247,13 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
} }
if (maxValueAsBytes != null) { if (maxValueAsBytes != null) {
int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0); int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0);
// if doc's value is too high or for single sort even equal, it is not competitive
// and the doc can be skipped if (cmp > 0) return;
if (cmp > 0 || (singleSort && cmp == 0)) return;
} }
if (minValueAsBytes != null) { if (minValueAsBytes != null) {
int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0); int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0);
// if doc's value is too low or for single sort even equal, it is not competitive
// and the doc can be skipped if (cmp < 0) return;
if (cmp < 0 || (singleSort && cmp == 0)) return;
} }
adder.add(docID); // doc is competitive adder.add(docID); // doc is competitive
} }
@ -261,13 +262,15 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (maxValueAsBytes != null) { if (maxValueAsBytes != null) {
int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0); int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0);
if (cmp > 0 || (singleSort && cmp == 0)) // 1. cmp ==0 and pruning==Pruning.GREATER_THAN_OR_EQUAL_TO : if the sort is
return PointValues.Relation.CELL_OUTSIDE_QUERY; // ascending then maxValueAsBytes is bottom's next less value, so it is competitive
// 2. cmp ==0 and pruning==Pruning.GREATER_THAN: maxValueAsBytes equals to
// bottom, but there are multiple comparators, so it could be competitive
if (cmp > 0) return PointValues.Relation.CELL_OUTSIDE_QUERY;
} }
if (minValueAsBytes != null) { if (minValueAsBytes != null) {
int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0); int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0);
if (cmp < 0 || (singleSort && cmp == 0)) if (cmp < 0) return PointValues.Relation.CELL_OUTSIDE_QUERY;
return PointValues.Relation.CELL_OUTSIDE_QUERY;
} }
if ((maxValueAsBytes != null if ((maxValueAsBytes != null
&& bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0) && bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0)
@ -285,6 +288,11 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
// the new range is not selective enough to be worth materializing, it doesn't reduce number // the new range is not selective enough to be worth materializing, it doesn't reduce number
// of docs at least 8x // of docs at least 8x
updateSkipInterval(false); updateSkipInterval(false);
if (pointValues.getDocCount() < iteratorCost) {
// Use the set of doc with values to help drive iteration
competitiveIterator = getNumericDocValues(context, field);
iteratorCost = pointValues.getDocCount();
}
return; return;
} }
pointValues.intersect(visitor); pointValues.intersect(visitor);
@ -309,6 +317,48 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
} }
} }
/**
* If {@link NumericComparator#pruning} equals {@link Pruning#GREATER_THAN_OR_EQUAL_TO}, we
* could better tune the {@link NumericLeafComparator#maxValueAsBytes}/{@link
* NumericLeafComparator#minValueAsBytes}. For instance, if the sort is ascending and bottom
* value is 5, we will use a range on [MIN_VALUE, 4].
*/
private void encodeBottom() {
if (reverse == false) {
encodeBottom(maxValueAsBytes);
if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {
NumericUtils.nextDown(maxValueAsBytes);
}
} else {
encodeBottom(minValueAsBytes);
if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {
NumericUtils.nextUp(minValueAsBytes);
}
}
}
/**
* If {@link NumericComparator#pruning} equals {@link Pruning#GREATER_THAN_OR_EQUAL_TO}, we
* could better tune the {@link NumericLeafComparator#maxValueAsBytes}/{@link
* NumericLeafComparator#minValueAsBytes}. For instance, if the sort is ascending and top value
* is 3, we will use a range on [4, MAX_VALUE].
*/
private void encodeTop() {
if (reverse == false) {
encodeTop(minValueAsBytes);
// we could not tune the top value in page search
if (singleSort && pruning == Pruning.GREATER_THAN_OR_EQUAL_TO && queueFull) {
NumericUtils.nextUp(minValueAsBytes);
}
} else {
encodeTop(maxValueAsBytes);
// we could not tune the top value in page search
if (singleSort && pruning == Pruning.GREATER_THAN_OR_EQUAL_TO && queueFull) {
NumericUtils.nextDown(maxValueAsBytes);
}
}
}
private boolean isMissingValueCompetitive() { private boolean isMissingValueCompetitive() {
// if queue is full, always compare with bottom, // if queue is full, always compare with bottom,
// if not, check if we can compare with topValue // if not, check if we can compare with topValue
@ -316,7 +366,9 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
int result = compareMissingValueWithBottomValue(); int result = compareMissingValueWithBottomValue();
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom // in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse ? (result >= 0) : (result <= 0); return reverse
? (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO ? result > 0 : result >= 0)
: (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO ? result < 0 : result <= 0);
} else if (topValueSet) { } else if (topValueSet) {
int result = compareMissingValueWithTopValue(); int result = compareMissingValueWithTopValue();
// in reverse (desc) sort missingValue is competitive when it's smaller or equal to // in reverse (desc) sort missingValue is competitive when it's smaller or equal to
@ -357,6 +409,12 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
}; };
} }
/**
* in ascending sort, missing value is competitive when it is less or equal(maybe there are two
* or more comparators) than bottom value. if there is only one comparator(See {@link
* Pruning#GREATER_THAN_OR_EQUAL_TO}), missing value is competitive only when it is less than
* bottom value. vice versa in descending sort.
*/
protected abstract int compareMissingValueWithTopValue(); protected abstract int compareMissingValueWithTopValue();
protected abstract int compareMissingValueWithBottomValue(); protected abstract int compareMissingValueWithBottomValue();

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
@ -98,8 +99,8 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
* missing values at the end. * missing values at the end.
*/ */
public TermOrdValComparator( public TermOrdValComparator(
int numHits, String field, boolean sortMissingLast, boolean reverse, boolean enableSkipping) { int numHits, String field, boolean sortMissingLast, boolean reverse, Pruning pruning) {
canSkipDocuments = enableSkipping; canSkipDocuments = pruning != Pruning.NONE;
ords = new int[numHits]; ords = new int[numHits];
values = new BytesRef[numHits]; values = new BytesRef[numHits];
tempBRs = new BytesRefBuilder[numHits]; tempBRs = new BytesRefBuilder[numHits];

View File

@ -132,6 +132,52 @@ public final class NumericUtils {
} }
} }
/**
* Modify in-place the given bytes to the next value of the same length that compares greater than
* the current value. This returns false if, and only if, the value is currently equal to the
* maximum possible value.
*/
public static boolean nextUp(byte[] bytes) {
boolean isMax = true;
for (byte b : bytes) {
if (b != (byte) 0xff) {
isMax = false;
break;
}
}
if (isMax) return false;
for (int i = bytes.length - 1; i >= 0; --i) {
if (++bytes[i] != 0) {
break;
}
}
return true;
}
/**
* Modify in-place the given bytes to the previous value of the same length that compares less
* than the current value. This returns false if, and only if, the value is currently equal to the
* minimum possible value.
*/
public static boolean nextDown(byte[] bytes) {
boolean isMin = true;
for (byte b : bytes) {
if (b != 0) {
isMin = false;
break;
}
}
if (isMin) return false;
for (int i = bytes.length - 1; i >= 0; --i) {
if (bytes[i]-- != 0) {
break;
}
}
return true;
}
/** /**
* Encodes an integer {@code value} such that unsigned byte order comparison is consistent with * Encodes an integer {@code value} such that unsigned byte order comparison is consistent with
* {@link Integer#compare(int, int)} * {@link Integer#compare(int, int)}

View File

@ -118,7 +118,7 @@ final class JustCompileSearch {
@Override @Override
public FieldComparator<?> newComparator( public FieldComparator<?> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) { String fieldname, int numHits, Pruning pruning, boolean reversed) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
} }

View File

@ -162,7 +162,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
@Override @Override
public FieldComparator<Integer> newComparator( public FieldComparator<Integer> newComparator(
final String fieldname, final int numHits, boolean enableSkipping, boolean reversed) { final String fieldname, final int numHits, Pruning pruning, boolean reversed) {
return new FieldComparator<Integer>() { return new FieldComparator<Integer>() {
private final int[] values = new int[numHits]; private final int[] values = new int[numHits];

View File

@ -83,7 +83,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = newSearcher(reader, true, true, false); IndexSearcher searcher = newSearcher(reader, true, true, false);
final SortField sortField = new SortField("my_field", SortField.Type.LONG); final SortField sortField = new SortField("my_field", SortField.Type.LONG);
final Sort sort = new Sort(sortField); final Sort sort = new Sort(sortField);
@ -165,7 +165,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false); newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final SortField sortField = new SortField("my_field", SortField.Type.LONG); final SortField sortField = new SortField("my_field", SortField.Type.LONG);
@ -210,13 +210,14 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false); newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 3; final int numHits = 3;
final int totalHitsThreshold = 3; final int totalHitsThreshold = 3;
{ // test that optimization is not run when missing value setting of SortField is competitive { // test that optimization is run when missing value setting of SortField is competitive with
// Puring.GREATER_THAN_OR_EQUAL_TO
final SortField sortField = new SortField("my_field", SortField.Type.LONG); final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setMissingValue(0L); // set a competitive missing value sortField.setMissingValue(0L); // set a competitive missing value
final Sort sort = new Sort(sortField); final Sort sort = new Sort(sortField);
@ -224,6 +225,19 @@ public class TestSortOptimization extends LuceneTestCase {
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold); TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits); assertEquals(topDocs.scoreDocs.length, numHits);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // test that optimization is not run when missing value setting of SortField is competitive
// with Puring.SKIP
final SortField sortField1 = new SortField("my_field1", SortField.Type.LONG);
final SortField sortField2 = new SortField("my_field2", SortField.Type.LONG);
sortField1.setMissingValue(0L); // set a competitive missing value
sortField2.setMissingValue(0L); // set a competitive missing value
final Sort sort = new Sort(sortField1, sortField2);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals( assertEquals(
topDocs.totalHits.value, topDocs.totalHits.value,
numDocs); // assert that all documents were collected => optimization was not run numDocs); // assert that all documents were collected => optimization was not run
@ -251,9 +265,7 @@ public class TestSortOptimization extends LuceneTestCase {
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits); assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals( assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
topDocs.totalHits.value,
numDocs); // assert that all documents were collected => optimization was not run
} }
{ // test that optimization is not run when missing value setting of SortField is competitive { // test that optimization is not run when missing value setting of SortField is competitive
@ -268,9 +280,85 @@ public class TestSortOptimization extends LuceneTestCase {
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits); assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals( assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
topDocs.totalHits.value, }
numDocs); // assert that all documents were collected => optimization was not run
reader.close();
dir.close();
}
public void testNumericDocValuesOptimizationWithMissingValues() throws IOException {
final Directory dir = newDirectory();
IndexWriterConfig config =
new IndexWriterConfig()
// Make sure to use the default codec, otherwise some random points formats that have
// large values for maxPointsPerLeaf might not enable skipping with only 10k docs
.setCodec(TestUtil.getDefaultCodec());
final IndexWriter writer = new IndexWriter(dir, config);
final int numDocs = atLeast(10000);
final int missValuesNumDocs = numDocs / 2;
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
if (i <= missValuesNumDocs) { // missing value document
} else {
doc.add(new NumericDocValuesField("my_field", i));
doc.add(new LongPoint("my_field", i));
}
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalHits is deterministic
IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 3;
final int totalHitsThreshold = 3;
TopDocs topDocs1;
TopDocs topDocs2;
{ // Test that optimization is run with NumericDocValues when missing value is NOT competitive
final SortField sortField = new SortField("my_field", SortField.Type.LONG, true);
sortField.setMissingValue(0L); // missing value is not competitive
final Sort sort = new Sort(sortField);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
topDocs1 = searcher.search(new MatchAllDocsQuery(), manager);
assertNonCompetitiveHitsAreSkipped(topDocs1.totalHits.value, numDocs);
}
{ // Test that sort on sorted numeric field without sort optimization and with sort optimization
// produce the same results
final SortField sortField = new SortField("my_field", SortField.Type.LONG, true);
sortField.setMissingValue(0L); // missing value is not competitive
final Sort sort = new Sort(sortField);
sortField.setOptimizeSortWithPoints(false);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
topDocs2 = searcher.search(new MatchAllDocsQuery(), manager);
// assert that the resulting hits are the same
assertEquals(topDocs1.scoreDocs.length, topDocs2.scoreDocs.length);
assertEquals(topDocs1.scoreDocs.length, numHits);
ScoreDoc[] scoreDocs1 = topDocs1.scoreDocs;
ScoreDoc[] scoreDocs2 = topDocs2.scoreDocs;
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) scoreDocs1[i];
FieldDoc fieldDoc2 = (FieldDoc) scoreDocs2[i];
assertEquals(fieldDoc.fields[0], fieldDoc2.fields[0]);
assertEquals(fieldDoc.doc, fieldDoc2.doc);
}
assertTrue(topDocs1.totalHits.value < topDocs2.totalHits.value);
}
{ // Test that we can't do optimization via NumericDocValues when there are multiple comparators
final SortField sortField1 = new SortField("my_field", SortField.Type.LONG, true);
final SortField sortField2 = new SortField("other", SortField.Type.LONG, true);
sortField1.setMissingValue(0L); // missing value is not competitive
sortField2.setMissingValue(0L); // missing value is not competitive
final Sort multiSorts = new Sort(new SortField[] {sortField1, sortField2});
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(multiSorts, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
// can't optimization with NumericDocValues when there are multiple comparators
assertEquals(topDocs.totalHits.value, numDocs);
} }
reader.close(); reader.close();
@ -295,17 +383,19 @@ public class TestSortOptimization extends LuceneTestCase {
new NumericDocValuesField( new NumericDocValuesField(
"my_field2", numDocs - i)); // diff values for the field my_field2 "my_field2", numDocs - i)); // diff values for the field my_field2
writer.addDocument(doc); writer.addDocument(doc);
if (i == 7000) writer.flush(); // two segments // if there is only one segment, we could test that totalHits must always equal (numHits + 1)
if (i == 7000 && random().nextBoolean()) writer.flush(); // two segments
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false); newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 3; final int numHits = 3;
final int totalHitsThreshold = 3; final int totalHitsThreshold = 3;
{ // test that sorting on a single field with equal values uses the optimization { // test that sorting on a single field with equal values uses the optimization with
// GREATER_THAN_OR_EQUAL_TO
final SortField sortField = new SortField("my_field1", SortField.Type.INT); final SortField sortField = new SortField("my_field1", SortField.Type.INT);
final Sort sort = new Sort(sortField); final Sort sort = new Sort(sortField);
CollectorManager<TopFieldCollector, TopFieldDocs> manager = CollectorManager<TopFieldCollector, TopFieldDocs> manager =
@ -316,11 +406,15 @@ public class TestSortOptimization extends LuceneTestCase {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i]; FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]); assertEquals(100, fieldDoc.fields[0]);
} }
if (reader.leaves().size() == 1) {
// if segment size equals one, totalHits should always equals numHits plus 1
assertEquals(topDocs.totalHits.value, numHits + 1);
}
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs); assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
} }
{ // test that sorting on a single field with equal values and after parameter { // test that sorting on a single field with equal values and after parameter
// doesn't use the optimization // use the optimization with GREATER_THAN_OR_EQUAL_TO
final int afterValue = 100; final int afterValue = 100;
final int afterDocID = 10 + random().nextInt(1000); final int afterDocID = 10 + random().nextInt(1000);
final SortField sortField = new SortField("my_field1", SortField.Type.INT); final SortField sortField = new SortField("my_field1", SortField.Type.INT);
@ -335,11 +429,11 @@ public class TestSortOptimization extends LuceneTestCase {
assertEquals(100, fieldDoc.fields[0]); assertEquals(100, fieldDoc.fields[0]);
assertTrue(fieldDoc.doc > afterDocID); assertTrue(fieldDoc.doc > afterDocID);
} }
assertEquals(topDocs.totalHits.value, numDocs); assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
} }
{ // test that sorting on main field with equal values + another field for tie breaks doesn't { // test that sorting on main field with equal values + another field for tie breaks doesn't
// use optimization // use optimization with Pruning.GREATER_THAN
final SortField sortField1 = new SortField("my_field1", SortField.Type.INT); final SortField sortField1 = new SortField("my_field1", SortField.Type.INT);
final SortField sortField2 = new SortField("my_field2", SortField.Type.INT); final SortField sortField2 = new SortField("my_field2", SortField.Type.INT);
final Sort sort = new Sort(sortField1, sortField2); final Sort sort = new Sort(sortField1, sortField2);
@ -375,7 +469,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false); newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT); final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
@ -438,7 +532,7 @@ public class TestSortOptimization extends LuceneTestCase {
int numHits = 0; int numHits = 0;
do { do {
for (int i = 0; i < numIndices; i++) { for (int i = 0; i < numIndices; i++) {
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = IndexSearcher searcher =
newSearcher(readers[i], random().nextBoolean(), random().nextBoolean(), false); newSearcher(readers[i], random().nextBoolean(), random().nextBoolean(), false);
CollectorManager<TopFieldCollector, TopFieldDocs> manager = CollectorManager<TopFieldCollector, TopFieldDocs> manager =
@ -482,7 +576,7 @@ public class TestSortOptimization extends LuceneTestCase {
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false); newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 10; final int numHits = 10;
@ -601,7 +695,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false); newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
@ -803,14 +897,20 @@ public class TestSortOptimization extends LuceneTestCase {
writer.flush(); writer.flush();
} }
} }
boolean reverse = random().nextBoolean();
writer.flush(); writer.flush();
seqNos.sort(Long::compare); if (reverse == false) {
seqNos.sort(Long::compare);
} else {
seqNos.sort(Collections.reverseOrder());
}
IndexReader reader = DirectoryReader.open(writer); IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = newSearcher(reader, random().nextBoolean(), random().nextBoolean()); IndexSearcher searcher = newSearcher(reader, random().nextBoolean(), random().nextBoolean());
SortField sortField = new SortField("seq_no", SortField.Type.LONG); SortField sortField = new SortField("seq_no", SortField.Type.LONG, reverse);
int visitedHits = 0; int visitedHits = 0;
ScoreDoc after = null; ScoreDoc after = null;
// test page search
while (visitedHits < seqNos.size()) { while (visitedHits < seqNos.size()) {
int batch = 1 + random().nextInt(100); int batch = 1 + random().nextInt(100);
Query query = Query query =
@ -828,6 +928,17 @@ public class TestSortOptimization extends LuceneTestCase {
visitedHits++; visitedHits++;
} }
} }
// test search
int numHits = 1 + random().nextInt(100);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(new Sort(sortField), numHits, null, numHits);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
long expectedSeqNo = seqNos.get(i);
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue());
}
reader.close(); reader.close();
dir.close(); dir.close();
} }
@ -848,7 +959,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = newSearcher(reader, true, true, false); IndexSearcher searcher = newSearcher(reader, true, true, false);
SortedNumericSelector.Type type = SortedNumericSelector.Type type =
@ -1098,7 +1209,7 @@ public class TestSortOptimization extends LuceneTestCase {
private TopDocs assertSearchHits(DirectoryReader reader, Sort sort, int n, FieldDoc after) private TopDocs assertSearchHits(DirectoryReader reader, Sort sort, int n, FieldDoc after)
throws IOException { throws IOException {
// single threaded so totalhits is deterministic // single threaded so totalHits is deterministic
IndexSearcher searcher = newSearcher(reader, true, true, false); IndexSearcher searcher = newSearcher(reader, true, true, false);
Query query = new MatchAllDocsQuery(); Query query = new MatchAllDocsQuery();
CollectorManager<TopFieldCollector, TopFieldDocs> manager = CollectorManager<TopFieldCollector, TopFieldDocs> manager =

View File

@ -616,4 +616,32 @@ public class TestNumericUtils extends LuceneTestCase {
Integer.signum(leftValue.compareTo(rightValue)), Integer.signum(left.compareTo(right))); Integer.signum(leftValue.compareTo(rightValue)), Integer.signum(left.compareTo(right)));
} }
} }
public void testNextUp() {
for (int i : new int[] {Integer.MIN_VALUE, -256, -255, -1, 0, 1, 255, 256, Integer.MAX_VALUE}) {
byte[] b = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(i, b, 0);
if (i == Integer.MAX_VALUE) {
assertFalse(NumericUtils.nextUp(b));
assertEquals(i, NumericUtils.sortableBytesToInt(b, 0));
} else {
assertTrue(NumericUtils.nextUp(b));
assertEquals(i + 1, NumericUtils.sortableBytesToInt(b, 0));
}
}
}
public void testNextDown() {
for (int i : new int[] {Integer.MIN_VALUE, -256, -255, -1, 0, 1, 255, 256, Integer.MAX_VALUE}) {
byte[] b = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(i, b, 0);
if (i == Integer.MIN_VALUE) {
assertFalse(NumericUtils.nextDown(b));
assertEquals(i, NumericUtils.sortableBytesToInt(b, 0));
} else {
assertTrue(NumericUtils.nextDown(b));
assertEquals(i - 1, NumericUtils.sortableBytesToInt(b, 0));
}
}
}
} }

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.SimpleCollector;
@ -260,7 +261,7 @@ public abstract class AllGroupHeadsCollector<T> extends SimpleCollector {
comparators = new FieldComparator[sortFields.length]; comparators = new FieldComparator[sortFields.length];
leafComparators = new LeafFieldComparator[sortFields.length]; leafComparators = new LeafFieldComparator[sortFields.length];
for (int i = 0; i < sortFields.length; i++) { for (int i = 0; i < sortFields.length; i++) {
comparators[i] = sortFields[i].getComparator(1, false); comparators[i] = sortFields[i].getComparator(1, Pruning.NONE);
leafComparators[i] = comparators[i].getLeafComparator(context); leafComparators[i] = comparators[i].getLeafComparator(context);
leafComparators[i].setScorer(scorer); leafComparators[i].setScorer(scorer);
leafComparators[i].copy(0, doc); leafComparators[i].copy(0, doc);

View File

@ -23,6 +23,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
@ -240,7 +241,7 @@ public class BlockGroupingCollector extends SimpleCollector {
reversed = new int[sortFields.length]; reversed = new int[sortFields.length];
for (int i = 0; i < sortFields.length; i++) { for (int i = 0; i < sortFields.length; i++) {
final SortField sortField = sortFields[i]; final SortField sortField = sortFields[i];
comparators[i] = sortField.getComparator(topNGroups, false); comparators[i] = sortField.getComparator(topNGroups, Pruning.NONE);
reversed[i] = sortField.getReverse() ? -1 : 1; reversed[i] = sortField.getReverse() ? -1 : 1;
} }
} }

View File

@ -25,6 +25,7 @@ import java.util.TreeSet;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.SimpleCollector;
@ -93,7 +94,7 @@ public class FirstPassGroupingCollector<T> extends SimpleCollector {
// use topNGroups + 1 so we have a spare slot to use for comparing (tracked by // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by
// this.spareSlot): // this.spareSlot):
comparators[i] = sortField.getComparator(topNGroups + 1, false); comparators[i] = sortField.getComparator(topNGroups + 1, Pruning.NONE);
reversed[i] = sortField.getReverse() ? -1 : 1; reversed[i] = sortField.getReverse() ? -1 : 1;
} }

View File

@ -27,6 +27,7 @@ import java.util.Map;
import java.util.NavigableSet; import java.util.NavigableSet;
import java.util.TreeSet; import java.util.TreeSet;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
@ -176,7 +177,7 @@ public class SearchGroup<T> {
reversed = new int[sortFields.length]; reversed = new int[sortFields.length];
for (int compIDX = 0; compIDX < sortFields.length; compIDX++) { for (int compIDX = 0; compIDX < sortFields.length; compIDX++) {
final SortField sortField = sortFields[compIDX]; final SortField sortField = sortFields[compIDX];
comparators[compIDX] = sortField.getComparator(1, false); comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
reversed[compIDX] = sortField.getReverse() ? -1 : 1; reversed[compIDX] = sortField.getReverse() ? -1 : 1;
} }
} }

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.comparators.DoubleComparator; import org.apache.lucene.search.comparators.DoubleComparator;
import org.apache.lucene.search.comparators.FloatComparator; import org.apache.lucene.search.comparators.FloatComparator;
@ -112,7 +113,7 @@ public class ToParentBlockJoinSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
switch (getType()) { switch (getType()) {
case STRING: case STRING:
return getStringComparator(numHits); return getStringComparator(numHits);
@ -136,7 +137,7 @@ public class ToParentBlockJoinSortField extends SortField {
private FieldComparator<?> getStringComparator(int numHits) { private FieldComparator<?> getStringComparator(int numHits) {
return new TermOrdValComparator( return new TermOrdValComparator(
numHits, getField(), missingValue == STRING_LAST, getReverse(), false) { numHits, getField(), missingValue == STRING_LAST, getReverse(), Pruning.NONE) {
@Override @Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
throws IOException { throws IOException {
@ -154,7 +155,8 @@ public class ToParentBlockJoinSortField extends SortField {
} }
private FieldComparator<?> getIntComparator(int numHits) { private FieldComparator<?> getIntComparator(int numHits) {
return new IntComparator(numHits, getField(), (Integer) missingValue, getReverse(), false) { return new IntComparator(
numHits, getField(), (Integer) missingValue, getReverse(), Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new IntLeafComparator(context) { return new IntLeafComparator(context) {
@ -178,7 +180,8 @@ public class ToParentBlockJoinSortField extends SortField {
} }
private FieldComparator<?> getLongComparator(int numHits) { private FieldComparator<?> getLongComparator(int numHits) {
return new LongComparator(numHits, getField(), (Long) missingValue, getReverse(), false) { return new LongComparator(
numHits, getField(), (Long) missingValue, getReverse(), Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new LongLeafComparator(context) { return new LongLeafComparator(context) {
@ -202,7 +205,8 @@ public class ToParentBlockJoinSortField extends SortField {
} }
private FieldComparator<?> getFloatComparator(int numHits) { private FieldComparator<?> getFloatComparator(int numHits) {
return new FloatComparator(numHits, getField(), (Float) missingValue, getReverse(), false) { return new FloatComparator(
numHits, getField(), (Float) missingValue, getReverse(), Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new FloatLeafComparator(context) { return new FloatLeafComparator(context) {
@ -234,7 +238,8 @@ public class ToParentBlockJoinSortField extends SortField {
} }
private FieldComparator<?> getDoubleComparator(int numHits) { private FieldComparator<?> getDoubleComparator(int numHits) {
return new DoubleComparator(numHits, getField(), (Double) missingValue, getReverse(), false) { return new DoubleComparator(
numHits, getField(), (Double) missingValue, getReverse(), Pruning.NONE) {
@Override @Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new DoubleLeafComparator(context) { return new DoubleLeafComparator(context) {

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValues; import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.SimpleFieldComparator; import org.apache.lucene.search.SimpleFieldComparator;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
@ -376,7 +377,7 @@ public abstract class ValueSource {
@Override @Override
public FieldComparator<Double> newComparator( public FieldComparator<Double> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) { String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new ValueSourceComparator(context, numHits); return new ValueSourceComparator(context, numHits);
} }
} }

View File

@ -17,6 +17,7 @@
package org.apache.lucene.spatial3d; package org.apache.lucene.spatial3d;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.spatial3d.geom.GeoOutsideDistance; import org.apache.lucene.spatial3d.geom.GeoOutsideDistance;
import org.apache.lucene.spatial3d.geom.PlanetModel; import org.apache.lucene.spatial3d.geom.PlanetModel;
@ -41,7 +42,7 @@ final class Geo3DPointOutsideSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new Geo3DPointOutsideDistanceComparator(getField(), planetModel, distanceShape, numHits); return new Geo3DPointOutsideDistanceComparator(getField(), planetModel, distanceShape, numHits);
} }

View File

@ -17,6 +17,7 @@
package org.apache.lucene.spatial3d; package org.apache.lucene.spatial3d;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.spatial3d.geom.GeoDistanceShape; import org.apache.lucene.spatial3d.geom.GeoDistanceShape;
import org.apache.lucene.spatial3d.geom.PlanetModel; import org.apache.lucene.spatial3d.geom.PlanetModel;
@ -41,7 +42,7 @@ final class Geo3DPointSortField extends SortField {
} }
@Override @Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) { public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new Geo3DPointDistanceComparator(getField(), planetModel, distanceShape, numHits); return new Geo3DPointDistanceComparator(getField(), planetModel, distanceShape, numHits);
} }