mirror of https://github.com/apache/lucene.git
LUCENE-9280: Collectors to skip noncompetitive documents (#1351)
Similar how scorers can update their iterators to skip non-competitive documents, collectors and comparators should also provide and update iterators that allow them to skip non-competive documents.
This commit is contained in:
parent
ea1bb9f2e9
commit
b0333ab5c8
|
@ -107,6 +107,14 @@ Improvements
|
|||
|
||||
* LUCENE-9074: Introduce Slice Executor For Dynamic Runtime Execution Of Slices (Atri Sharma)
|
||||
|
||||
* LUCENE-9280: Add an ability for field comparators to skip non-competitive documents.
|
||||
Creating a TopFieldCollector with totalHitsThreshold less than Integer.MAX_VALUE
|
||||
instructs Lucene to skip non-competitive documents whenever possible. For numeric
|
||||
sort fields the skipping functionality works when the same field is indexed both
|
||||
with doc values and points. In this case, there is an assumption that the same data is
|
||||
stored in these points and doc values (Mayya Sharipova, Jim Ferenczi, Adrien Grand)
|
||||
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||
|
|
|
@ -287,3 +287,9 @@ TopDocsCollector shall no longer return an empty TopDocs for malformed arguments
|
|||
Rather, an IllegalArgumentException shall be thrown. This is introduced for better
|
||||
defence and to ensure that there is no bubbling up of errors when Lucene is
|
||||
used in multi level applications
|
||||
|
||||
## Assumption of data consistency between different data-structures sharing the same field name
|
||||
|
||||
Sorting on a numeric field that is indexed with both doc values and points may use an
|
||||
optimization to skip non-competitive documents. This optimization relies on the assumption
|
||||
that the same data is stored in these points and doc values.
|
||||
|
|
|
@ -115,7 +115,7 @@ public final class ConstantScoreQuery extends Query {
|
|||
return new ConstantScoreWeight(this, boost) {
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
if (scoreMode.isExhaustive() == false) {
|
||||
return super.bulkScorer(context);
|
||||
}
|
||||
final BulkScorer innerScorer = innerWeight.bulkScorer(context);
|
||||
|
|
|
@ -165,8 +165,8 @@ public abstract class FieldComparator<T> {
|
|||
* org.apache.lucene.index.LeafReader#getNumericDocValues} and sorts by ascending value */
|
||||
public static class DoubleComparator extends NumericComparator<Double> {
|
||||
private final double[] values;
|
||||
private double bottom;
|
||||
private double topValue;
|
||||
protected double bottom;
|
||||
protected double topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Double#compare} for {@code numHits}.
|
||||
|
@ -225,8 +225,8 @@ public abstract class FieldComparator<T> {
|
|||
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
|
||||
public static class FloatComparator extends NumericComparator<Float> {
|
||||
private final float[] values;
|
||||
private float bottom;
|
||||
private float topValue;
|
||||
protected float bottom;
|
||||
protected float topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Float#compare} for {@code numHits}.
|
||||
|
@ -285,8 +285,8 @@ public abstract class FieldComparator<T> {
|
|||
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
|
||||
public static class IntComparator extends NumericComparator<Integer> {
|
||||
private final int[] values;
|
||||
private int bottom; // Value of bottom of queue
|
||||
private int topValue;
|
||||
protected int bottom; // Value of bottom of queue
|
||||
protected int topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Integer#compare} for {@code numHits}.
|
||||
|
@ -347,8 +347,8 @@ public abstract class FieldComparator<T> {
|
|||
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
|
||||
public static class LongComparator extends NumericComparator<Long> {
|
||||
private final long[] values;
|
||||
private long bottom;
|
||||
private long topValue;
|
||||
protected long bottom;
|
||||
protected long topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Long#compare} for {@code numHits}.
|
||||
|
|
|
@ -58,8 +58,8 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
private final int oneReverseMul;
|
||||
private final FieldComparator<?> oneComparator;
|
||||
|
||||
public OneComparatorFieldValueHitQueue(SortField[] fields, int size) {
|
||||
super(fields, size);
|
||||
public OneComparatorFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
|
||||
super(fields, size, filterNonCompetitiveDocs);
|
||||
|
||||
assert fields.length == 1;
|
||||
oneComparator = comparators[0];
|
||||
|
@ -95,8 +95,8 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
*/
|
||||
private static final class MultiComparatorsFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> {
|
||||
|
||||
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size) {
|
||||
super(fields, size);
|
||||
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
|
||||
super(fields, size, filterNonCompetitiveDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -121,7 +121,7 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
}
|
||||
|
||||
// prevent instantiation and extension.
|
||||
private FieldValueHitQueue(SortField[] fields, int size) {
|
||||
private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
|
||||
super(size);
|
||||
// When we get here, fields.length is guaranteed to be > 0, therefore no
|
||||
// need to check it again.
|
||||
|
@ -135,9 +135,15 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
reverseMul = new int[numComparators];
|
||||
for (int i = 0; i < numComparators; ++i) {
|
||||
SortField field = fields[i];
|
||||
|
||||
reverseMul[i] = field.reverse ? -1 : 1;
|
||||
comparators[i] = field.getComparator(size, i);
|
||||
if (i == 0 && filterNonCompetitiveDocs) {
|
||||
// try to rewrite the 1st comparator to the comparator that can skip non-competitive documents
|
||||
// skipping functionality is beneficial only for the 1st comparator
|
||||
comparators[i] = FilteringFieldComparator.wrapToFilteringComparator(field.getComparator(size, i),
|
||||
field.reverse, numComparators == 1);
|
||||
} else {
|
||||
comparators[i] = field.getComparator(size, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -152,17 +158,20 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
* priority first); cannot be <code>null</code> or empty
|
||||
* @param size
|
||||
* The number of hits to retain. Must be greater than zero.
|
||||
* @param filterNonCompetitiveDocs
|
||||
* {@code true} If comparators should be allowed to filter non-competitive documents, {@code false} otherwise
|
||||
*/
|
||||
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size) {
|
||||
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size,
|
||||
boolean filterNonCompetitiveDocs) {
|
||||
|
||||
if (fields.length == 0) {
|
||||
throw new IllegalArgumentException("Sort must contain at least one field");
|
||||
}
|
||||
|
||||
if (fields.length == 1) {
|
||||
return new OneComparatorFieldValueHitQueue<>(fields, size);
|
||||
return new OneComparatorFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
|
||||
} else {
|
||||
return new MultiComparatorsFieldValueHitQueue<>(fields, size);
|
||||
return new MultiComparatorsFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A wrapper over {@code FieldComparator} that provides a leaf comparator that can filter non-competitive docs.
|
||||
*/
|
||||
abstract class FilteringFieldComparator<T> extends FieldComparator<T> {
|
||||
protected final FieldComparator<T> in;
|
||||
protected final boolean reverse;
|
||||
// singleSort is true, if sort is based on a single sort field. As there are no other sorts configured
|
||||
// as tie breakers, we can filter out docs with equal values.
|
||||
protected final boolean singleSort;
|
||||
protected boolean hasTopValue = false;
|
||||
|
||||
public FilteringFieldComparator(FieldComparator<T> in, boolean reverse, boolean singleSort) {
|
||||
this.in = in;
|
||||
this.reverse = reverse;
|
||||
this.singleSort = singleSort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException;
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return in.compare(slot1, slot2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T value(int slot) {
|
||||
return in.value(slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(T value) {
|
||||
in.setTopValue(value);
|
||||
hasTopValue = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareValues(T first, T second) {
|
||||
return in.compareValues(first, second);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Try to wrap a given field comparator to add to it a functionality to skip over non-competitive docs.
|
||||
* If for the given comparator the skip functionality is not implemented, return the comparator itself.
|
||||
* @param comparator – comparator to wrap
|
||||
* @param reverse – if this sort is reverse
|
||||
* @param singleSort – true if this sort is based on a single field and there are no other sort fields for tie breaking
|
||||
* @return comparator wrapped as a filtering comparator or the original comparator if the filtering functionality
|
||||
* is not implemented for it
|
||||
*/
|
||||
public static FieldComparator<?> wrapToFilteringComparator(FieldComparator<?> comparator, boolean reverse, boolean singleSort) {
|
||||
Class<?> comparatorClass = comparator.getClass();
|
||||
if (comparatorClass == FieldComparator.LongComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.LongComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
if (comparatorClass == FieldComparator.IntComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.IntComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
if (comparatorClass == FieldComparator.DoubleComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.DoubleComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
if (comparatorClass == FieldComparator.FloatComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.FloatComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
return comparator;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Decorates a wrapped LeafFieldComparator to add a functionality to skip over non-competitive docs.
|
||||
* FilteringLeafFieldComparator provides two additional functions to a LeafFieldComparator:
|
||||
* {@code competitiveIterator()} and {@code setCanUpdateIterator()}.
|
||||
*/
|
||||
public interface FilteringLeafFieldComparator extends LeafFieldComparator {
|
||||
/**
|
||||
* Returns a competitive iterator
|
||||
* @return an iterator over competitive docs that are stronger than already collected docs
|
||||
* or {@code null} if such an iterator is not available for the current segment.
|
||||
*/
|
||||
DocIdSetIterator competitiveIterator() throws IOException;
|
||||
|
||||
/**
|
||||
* Informs this leaf comparator that it is allowed to start updating its competitive iterator.
|
||||
* This method is called from a collector when queue becomes full and threshold is reached.
|
||||
*/
|
||||
void setCanUpdateIterator() throws IOException;
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A wrapper over {@code NumericComparator} that provides a leaf comparator that can filter non-competitive docs.
|
||||
*/
|
||||
class FilteringNumericComparator<T extends Number> extends FilteringFieldComparator<T> {
|
||||
public FilteringNumericComparator(NumericComparator<T> in, boolean reverse, boolean singleSort) {
|
||||
super(in, reverse, singleSort);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
LeafFieldComparator inLeafComparator = in.getLeafComparator(context);
|
||||
Class<?> comparatorClass = inLeafComparator.getClass();
|
||||
if (comparatorClass == FieldComparator.LongComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringLongLeafComparator((FieldComparator.LongComparator) inLeafComparator, context,
|
||||
((LongComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} if (comparatorClass == FieldComparator.IntComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringIntLeafComparator((FieldComparator.IntComparator) inLeafComparator, context,
|
||||
((IntComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} else if (comparatorClass == FieldComparator.DoubleComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringDoubleLeafComparator((FieldComparator.DoubleComparator) inLeafComparator, context,
|
||||
((DoubleComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} else if (comparatorClass == FieldComparator.FloatComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringFloatLeafComparator((FieldComparator.FloatComparator) inLeafComparator, context,
|
||||
((FloatComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected numeric class of ["+ comparatorClass + "] for [FieldComparator]!");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,336 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* A {@code FilteringLeafFieldComparator} that provides a functionality to skip over non-competitive documents
|
||||
* for numeric fields indexed with points.
|
||||
*/
|
||||
abstract class FilteringNumericLeafComparator implements FilteringLeafFieldComparator {
|
||||
protected final LeafFieldComparator in;
|
||||
protected final boolean reverse;
|
||||
protected final boolean singleSort; //if sort is based on a single sort field as opposed to multiple sort fields
|
||||
private final boolean hasTopValue;
|
||||
private final PointValues pointValues;
|
||||
private final int bytesCount;
|
||||
private final int maxDoc;
|
||||
private final byte[] minValueAsBytes;
|
||||
private final byte[] maxValueAsBytes;
|
||||
|
||||
private long iteratorCost;
|
||||
private int maxDocVisited = 0;
|
||||
private int updateCounter = 0;
|
||||
private boolean canUpdateIterator = false; // set to true when queue becomes full and hitsThreshold is reached
|
||||
private DocIdSetIterator competitiveIterator;
|
||||
|
||||
public FilteringNumericLeafComparator(LeafFieldComparator in, LeafReaderContext context, String field,
|
||||
boolean reverse, boolean singleSort, boolean hasTopValue, int bytesCount) throws IOException {
|
||||
this.in = in;
|
||||
this.pointValues = context.reader().getPointValues(field);
|
||||
this.reverse = reverse;
|
||||
this.singleSort = singleSort;
|
||||
this.hasTopValue = hasTopValue;
|
||||
this.maxDoc = context.reader().maxDoc();
|
||||
this.bytesCount = bytesCount;
|
||||
this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : hasTopValue ? new byte[bytesCount] : null;
|
||||
this.minValueAsBytes = reverse ? new byte[bytesCount] : hasTopValue ? new byte[bytesCount] : null;
|
||||
|
||||
// TODO: optimize a case when pointValues are missing only on this segment
|
||||
this.competitiveIterator = pointValues == null ? null : DocIdSetIterator.all(maxDoc);
|
||||
this.iteratorCost = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
in.setBottom(slot);
|
||||
updateCompetitiveIterator(); // update an iterator if we set a new bottom
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return in.compareBottom(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return in.compareTop(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
in.copy(slot, doc);
|
||||
maxDocVisited = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
in.setScorer(scorer);
|
||||
if (scorer instanceof Scorer) {
|
||||
iteratorCost = ((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
|
||||
updateCompetitiveIterator(); // update an iterator when we have a new segment
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCanUpdateIterator() throws IOException {
|
||||
this.canUpdateIterator = true;
|
||||
updateCompetitiveIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() {
|
||||
if (competitiveIterator == null) return null;
|
||||
return new DocIdSetIterator() {
|
||||
private int doc;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return doc = competitiveIterator.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return competitiveIterator.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = competitiveIterator.advance(target);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// update its iterator to include possibly only docs that are "stronger" than the current bottom entry
|
||||
private void updateCompetitiveIterator() throws IOException {
|
||||
if (canUpdateIterator == false) return;
|
||||
if (pointValues == null) return;
|
||||
// if some documents have missing points, check that missing values prohibits optimization
|
||||
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
|
||||
return; // we can't filter out documents, as documents with missing values are competitive
|
||||
}
|
||||
|
||||
updateCounter++;
|
||||
if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
|
||||
return;
|
||||
}
|
||||
if (reverse == false) {
|
||||
encodeBottom(maxValueAsBytes);
|
||||
if (hasTopValue) {
|
||||
encodeTop(minValueAsBytes);
|
||||
}
|
||||
} else {
|
||||
encodeBottom(minValueAsBytes);
|
||||
if (hasTopValue) {
|
||||
encodeTop(maxValueAsBytes);
|
||||
}
|
||||
}
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
|
||||
PointValues.IntersectVisitor visitor = new PointValues.IntersectVisitor() {
|
||||
DocIdSetBuilder.BulkAdder adder;
|
||||
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
adder = result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // Already visited or skipped
|
||||
}
|
||||
adder.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // already visited or skipped
|
||||
}
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
|
||||
// if doc's value is too high or for single sort even equal, it is not competitive and the doc can be skipped
|
||||
if (cmp > 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
|
||||
// if doc's value is too low or for single sort even equal, it is not competitive and the doc can be skipped
|
||||
if (cmp < 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
adder.add(docID); // doc is competitive
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(minPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
|
||||
if (cmp > 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
|
||||
if (cmp < 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if ((maxValueAsBytes != null && Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount) > 0) ||
|
||||
(minValueAsBytes != null && Arrays.compareUnsigned(minPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount) < 0)) {
|
||||
return PointValues.Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
};
|
||||
final long threshold = iteratorCost >>> 3;
|
||||
long estimatedNumberOfMatches = pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||||
if (estimatedNumberOfMatches >= threshold) {
|
||||
// the new range is not selective enough to be worth materializing, it doesn't reduce number of docs at least 8x
|
||||
return;
|
||||
}
|
||||
pointValues.intersect(visitor);
|
||||
competitiveIterator = result.build().iterator();
|
||||
iteratorCost = competitiveIterator.cost();
|
||||
}
|
||||
|
||||
protected abstract boolean isMissingValueCompetitive();
|
||||
|
||||
protected abstract void encodeBottom(byte[] packedValue);
|
||||
|
||||
protected abstract void encodeTop(byte[] packedValue);
|
||||
|
||||
|
||||
/**
|
||||
* A wrapper over double long comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringLongLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringLongLeafComparator(FieldComparator.LongComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Long.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Long.compare(((FieldComparator.LongComparator) in).missingValue, ((FieldComparator.LongComparator) in).bottom);
|
||||
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
|
||||
// in asc sort missingValue is competitive when it's smaller or equal to bottom
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
LongPoint.encodeDimension(((FieldComparator.LongComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
LongPoint.encodeDimension(((FieldComparator.LongComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper over integer leaf comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringIntLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringIntLeafComparator(FieldComparator.IntComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Integer.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Integer.compare(((FieldComparator.IntComparator) in).missingValue, ((FieldComparator.IntComparator) in).bottom);
|
||||
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
|
||||
// in asc sort missingValue is competitive when it's smaller or equal to bottom
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
IntPoint.encodeDimension(((FieldComparator.IntComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
IntPoint.encodeDimension(((FieldComparator.IntComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper over double leaf comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringDoubleLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringDoubleLeafComparator(FieldComparator.DoubleComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Double.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Double.compare(((FieldComparator.DoubleComparator) in).missingValue, ((FieldComparator.DoubleComparator) in).bottom);
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
DoublePoint.encodeDimension(((FieldComparator.DoubleComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
DoublePoint.encodeDimension(((FieldComparator.DoubleComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper over float leaf comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringFloatLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringFloatLeafComparator(FieldComparator.FloatComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Float.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Float.compare(((FieldComparator.FloatComparator) in).missingValue, ((FieldComparator.FloatComparator) in).bottom);
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
FloatPoint.encodeDimension(((FieldComparator.FloatComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
FloatPoint.encodeDimension(((FieldComparator.FloatComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -93,4 +93,16 @@ public interface LeafCollector {
|
|||
*/
|
||||
void collect(int doc) throws IOException;
|
||||
|
||||
/**
|
||||
* Optionally returns an iterator over competitive documents.
|
||||
*
|
||||
* Collectors should delegate this method to their comparators if
|
||||
* their comparators provide the skipping functionality over non-competitive docs.
|
||||
*
|
||||
* The default is to return {@code null} which is interpreted as the collector provide any competitive iterator.
|
||||
*/
|
||||
default DocIdSetIterator competitiveIterator() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ public final class MatchAllDocsQuery extends Query {
|
|||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
if (scoreMode.isExhaustive() == false) {
|
||||
return super.bulkScorer(context);
|
||||
}
|
||||
final float score = score();
|
||||
|
|
|
@ -24,37 +24,53 @@ public enum ScoreMode {
|
|||
/**
|
||||
* Produced scorers will allow visiting all matches and get their score.
|
||||
*/
|
||||
COMPLETE {
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
COMPLETE(true, true),
|
||||
|
||||
/**
|
||||
* Produced scorers will allow visiting all matches but scores won't be
|
||||
* available.
|
||||
*/
|
||||
COMPLETE_NO_SCORES {
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
COMPLETE_NO_SCORES(true, false),
|
||||
|
||||
/**
|
||||
* Produced scorers will optionally allow skipping over non-competitive
|
||||
* hits using the {@link Scorer#setMinCompetitiveScore(float)} API.
|
||||
*/
|
||||
TOP_SCORES {
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
TOP_SCORES(false, true),
|
||||
|
||||
/**
|
||||
* ScoreMode for top field collectors that can provide their own iterators,
|
||||
* to optionally allow to skip for non-competitive docs
|
||||
*/
|
||||
TOP_DOCS(false, false),
|
||||
|
||||
/**
|
||||
* ScoreMode for top field collectors that can provide their own iterators,
|
||||
* to optionally allow to skip for non-competitive docs.
|
||||
* This mode is used when there is a secondary sort by _score.
|
||||
*/
|
||||
TOP_DOCS_WITH_SCORES(false, true);
|
||||
|
||||
private final boolean needsScores;
|
||||
private final boolean isExhaustive;
|
||||
|
||||
ScoreMode(boolean isExhaustive, boolean needsScores) {
|
||||
this.isExhaustive = isExhaustive;
|
||||
this.needsScores = needsScores;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this {@link ScoreMode} needs to compute scores.
|
||||
*/
|
||||
public abstract boolean needsScores();
|
||||
public boolean needsScores() {
|
||||
return needsScores;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if for this {@link ScoreMode} it is necessary to process all documents,
|
||||
* or {@code false} if is enough to go through top documents only.
|
||||
*/
|
||||
public boolean isExhaustive() {
|
||||
return isExhaustive;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,10 @@ import org.apache.lucene.util.NumericUtils;
|
|||
* Stores information about how to sort documents by terms in an individual
|
||||
* field. Fields must be indexed in order to sort by them.
|
||||
*
|
||||
* Sorting on a numeric field that is indexed with both doc values and points may use an
|
||||
* optimization to skip non-competitive documents. This optimization relies on the assumption
|
||||
* that the same data is stored in these points and doc values.
|
||||
*
|
||||
* <p>Created: Feb 11, 2004 1:25:29 PM
|
||||
*
|
||||
* @since lucene 1.4
|
||||
|
|
|
@ -49,10 +49,13 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
private static abstract class MultiComparatorLeafCollector implements LeafCollector {
|
||||
|
||||
final LeafFieldComparator comparator;
|
||||
final FilteringLeafFieldComparator filteringLeafComparator;
|
||||
final int reverseMul;
|
||||
Scorable scorer;
|
||||
|
||||
MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
|
||||
this.filteringLeafComparator = comparators[0] instanceof FilteringLeafFieldComparator ?
|
||||
(FilteringLeafFieldComparator) comparators[0] : null;
|
||||
if (comparators.length == 1) {
|
||||
this.reverseMul = reverseMul[0];
|
||||
this.comparator = comparators[0];
|
||||
|
@ -87,6 +90,14 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (filteringLeafComparator != null && queueFull &&
|
||||
hitsThresholdChecker.isThresholdReached() && totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// for the first time queue becomes full and hitsThreshold is reached,
|
||||
// notify leaf comparator that its competitive iterator can be updated
|
||||
filteringLeafComparator.setCanUpdateIterator();
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
}
|
||||
}
|
||||
|
||||
boolean thresholdCheck(int doc) throws IOException {
|
||||
|
@ -139,7 +150,23 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
if (minScoreAcc != null) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
if (filteringLeafComparator != null && queueFull && hitsThresholdChecker.isThresholdReached()) {
|
||||
// if queue became full and hitsThreshold was reached in previous segments,
|
||||
// notify this segment's leaf comparator that its competitive iterator can be updated
|
||||
filteringLeafComparator.setCanUpdateIterator();
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() throws IOException {
|
||||
if (filteringLeafComparator == null) {
|
||||
return null;
|
||||
} else {
|
||||
return filteringLeafComparator.competitiveIterator();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
|
||||
|
@ -246,18 +273,16 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
final int topCmp = reverseMul * comparator.compareTop(doc);
|
||||
if (topCmp > 0 || (topCmp == 0 && doc <= afterDoc)) {
|
||||
// Already collected on a previous page
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
// check if totalHitsThreshold is reached and we can update competitive score
|
||||
// necessary to account for possible update to global min competitive score
|
||||
updateMinCompetitiveScore(scorer);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (queueFull) {
|
||||
collectCompetitiveHit(doc);
|
||||
} else {
|
||||
|
@ -274,7 +299,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
|
||||
final int numHits;
|
||||
final HitsThresholdChecker hitsThresholdChecker;
|
||||
final FieldComparator.RelevanceComparator firstComparator;
|
||||
final FieldComparator.RelevanceComparator relevanceComparator;
|
||||
final boolean canSetMinScore;
|
||||
|
||||
// an accumulator that maintains the maximum of the segment's minimum competitive scores
|
||||
|
@ -302,18 +327,24 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
this.numHits = numHits;
|
||||
this.hitsThresholdChecker = hitsThresholdChecker;
|
||||
this.numComparators = pq.getComparators().length;
|
||||
FieldComparator<?> fieldComparator = pq.getComparators()[0];
|
||||
FieldComparator<?> firstComparator = pq.getComparators()[0];
|
||||
int reverseMul = pq.reverseMul[0];
|
||||
if (fieldComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
|
||||
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
|
||||
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
|
||||
firstComparator = (FieldComparator.RelevanceComparator) fieldComparator;
|
||||
|
||||
if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
|
||||
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
|
||||
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
|
||||
relevanceComparator = (FieldComparator.RelevanceComparator) firstComparator;
|
||||
scoreMode = ScoreMode.TOP_SCORES;
|
||||
canSetMinScore = true;
|
||||
} else {
|
||||
firstComparator = null;
|
||||
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
relevanceComparator = null;
|
||||
canSetMinScore = false;
|
||||
if (firstComparator instanceof FilteringFieldComparator) {
|
||||
assert hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE;
|
||||
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
|
||||
} else {
|
||||
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
}
|
||||
}
|
||||
this.minScoreAcc = minScoreAcc;
|
||||
}
|
||||
|
@ -343,8 +374,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
if (canSetMinScore
|
||||
&& queueFull
|
||||
&& hitsThresholdChecker.isThresholdReached()) {
|
||||
assert bottom != null && firstComparator != null;
|
||||
float minScore = firstComparator.value(bottom.slot);
|
||||
assert bottom != null && relevanceComparator != null;
|
||||
float minScore = relevanceComparator.value(bottom.slot);
|
||||
if (minScore > minCompetitiveScore) {
|
||||
scorer.setMinCompetitiveScore(minScore);
|
||||
minCompetitiveScore = minScore;
|
||||
|
@ -356,6 +387,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new {@link TopFieldCollector} from the given
|
||||
* arguments.
|
||||
|
@ -403,6 +435,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
* {@code totalHitsThreshold} hits then the hit count of the result will
|
||||
* be accurate. {@link Integer#MAX_VALUE} may be used to make the hit
|
||||
* count accurate, but this will also make query processing slower.
|
||||
* Setting totalHitsThreshold less than {@link Integer#MAX_VALUE}
|
||||
* instructs Lucene to skip non-competitive documents whenever possible. For numeric
|
||||
* sort fields the skipping functionality works when the same field is indexed both
|
||||
* with doc values and points. In this case, there is an assumption that the same data is
|
||||
* stored in these points and doc values.
|
||||
* @return a {@link TopFieldCollector} instance which will sort the results by
|
||||
* the sort criteria.
|
||||
*/
|
||||
|
@ -432,7 +469,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
throw new IllegalArgumentException("hitsThresholdChecker should not be null");
|
||||
}
|
||||
|
||||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||
// here we assume that if hitsThreshold was set, we let a comparator to skip non-competitive docs
|
||||
boolean filterNonCompetitiveDocs = hitsThresholdChecker.getHitsThreshold() == Integer.MAX_VALUE ? false : true;
|
||||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits, filterNonCompetitiveDocs);
|
||||
|
||||
if (after == null) {
|
||||
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -201,19 +202,20 @@ public abstract class Weight implements SegmentCacheable {
|
|||
@Override
|
||||
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
|
||||
collector.setScorer(scorer);
|
||||
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
|
||||
DocIdSetIterator collectorIterator = collector.competitiveIterator();
|
||||
// if possible filter scorerIterator to keep only competitive docs as defined by collector
|
||||
DocIdSetIterator filteredIterator = collectorIterator == null ? scorerIterator :
|
||||
ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator));
|
||||
if (scorer.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
scoreAll(collector, iterator, twoPhase, acceptDocs);
|
||||
scoreAll(collector, filteredIterator, twoPhase, acceptDocs);
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
int doc = scorer.docID();
|
||||
if (doc < min) {
|
||||
if (twoPhase == null) {
|
||||
doc = iterator.advance(min);
|
||||
} else {
|
||||
doc = twoPhase.approximation().advance(min);
|
||||
}
|
||||
doc = scorerIterator.advance(min);
|
||||
}
|
||||
return scoreRange(collector, iterator, twoPhase, acceptDocs, doc, max);
|
||||
return scoreRange(collector, filteredIterator, twoPhase, acceptDocs, doc, max);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -242,7 +244,7 @@ public abstract class Weight implements SegmentCacheable {
|
|||
return currentDoc;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Specialized method to bulk-score all hits; we
|
||||
* separate this from {@link #scoreRange} to help out
|
||||
* hotspot.
|
||||
|
|
|
@ -0,0 +1,294 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FloatDocValuesField;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.apache.lucene.search.SortField.FIELD_SCORE;
|
||||
|
||||
public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
|
||||
|
||||
public void testLongSortOptimization() throws IOException {
|
||||
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
final int numDocs = atLeast(10000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("my_field", i));
|
||||
doc.add(new LongPoint("my_field", i));
|
||||
writer.addDocument(doc);
|
||||
if (i == 7000) writer.flush(); // two segments
|
||||
}
|
||||
final IndexReader reader = DirectoryReader.open(writer);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||
final Sort sort = new Sort(sortField);
|
||||
final int numHits = 3;
|
||||
final int totalHitsThreshold = 3;
|
||||
|
||||
{ // simple sort
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue());
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
assertTrue(topDocs.totalHits.value < numDocs);
|
||||
}
|
||||
|
||||
{ // paging sort with after
|
||||
long afterValue = 2;
|
||||
FieldDoc after = new FieldDoc(2, Float.NaN, new Long[] {afterValue});
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(afterValue + 1 + i, fieldDoc.fields[0]);
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
assertTrue(topDocs.totalHits.value < numDocs);
|
||||
}
|
||||
|
||||
{ // test that if there is the secondary sort on _score, scores are filled correctly
|
||||
final TopFieldCollector collector = TopFieldCollector.create(new Sort(sortField, FIELD_SCORE), numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue());
|
||||
float score = (float) fieldDoc.fields[1];
|
||||
assertEquals(1.0, score, 0.001);
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
assertTrue(topDocs.totalHits.value < numDocs);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* test that even if a field is not indexed with points, optimized sort still works as expected,
|
||||
* although no optimization will be run
|
||||
*/
|
||||
public void testLongSortOptimizationOnFieldNotIndexedWithPoints() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
final int numDocs = atLeast(100);
|
||||
// my_field is not indexed with points
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("my_field", i));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
final IndexReader reader = DirectoryReader.open(writer);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||
final Sort sort = new Sort(sortField);
|
||||
final int numHits = 3;
|
||||
final int totalHitsThreshold = 3;
|
||||
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits); // sort still works and returns expected number of docs
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue()); // returns expected values
|
||||
}
|
||||
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
public void testSortOptimizationWithMissingValues() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
final int numDocs = atLeast(10000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
if ((i % 500) != 0) { // miss values on every 500th document
|
||||
doc.add(new NumericDocValuesField("my_field", i));
|
||||
doc.add(new LongPoint("my_field", i));
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
if (i == 7000) writer.flush(); // two segments
|
||||
}
|
||||
final IndexReader reader = DirectoryReader.open(writer);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final int numHits = 3;
|
||||
final int totalHitsThreshold = 3;
|
||||
|
||||
{ // test that optimization is not run when missing value setting of SortField is competitive
|
||||
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||
sortField.setMissingValue(0L); // set a competitive missing value
|
||||
final Sort sort = new Sort(sortField);
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
|
||||
}
|
||||
{ // test that optimization is run when missing value setting of SortField is NOT competitive
|
||||
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||
sortField.setMissingValue(100L); // set a NON competitive missing value
|
||||
final Sort sort = new Sort(sortField);
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
assertTrue(topDocs.totalHits.value < numDocs); // assert that some docs were skipped => optimization was run
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortOptimizationEqualValues() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
final int numDocs = atLeast(10000);
|
||||
for (int i = 1; i <= numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("my_field1", 100)); // all docs have the same value of my_field1
|
||||
doc.add(new IntPoint("my_field1", 100));
|
||||
doc.add(new NumericDocValuesField("my_field2", numDocs - i)); // diff values for the field my_field2
|
||||
writer.addDocument(doc);
|
||||
if (i == 7000) writer.flush(); // two segments
|
||||
}
|
||||
final IndexReader reader = DirectoryReader.open(writer);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final int numHits = 3;
|
||||
final int totalHitsThreshold = 3;
|
||||
|
||||
{ // test that sorting on a single field with equal values uses the optimization
|
||||
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
|
||||
final Sort sort = new Sort(sortField);
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(100, fieldDoc.fields[0]);
|
||||
}
|
||||
assertTrue(topDocs.totalHits.value < numDocs); // assert that some docs were skipped => optimization was run
|
||||
}
|
||||
|
||||
{ // test that sorting on a single field with equal values and after parameter uses the optimization
|
||||
final int afterValue = 100;
|
||||
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
|
||||
final Sort sort = new Sort(sortField);
|
||||
FieldDoc after = new FieldDoc(10, Float.NaN, new Integer[] {afterValue});
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(100, fieldDoc.fields[0]);
|
||||
}
|
||||
assertTrue(topDocs.totalHits.value < numDocs); // assert that some docs were skipped => optimization was run
|
||||
}
|
||||
|
||||
{ // test that sorting on main field with equal values + another field for tie breaks doesn't use optimization
|
||||
final SortField sortField1 = new SortField("my_field1", SortField.Type.INT);
|
||||
final SortField sortField2 = new SortField("my_field2", SortField.Type.INT);
|
||||
final Sort sort = new Sort(sortField1, sortField2);
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(100, fieldDoc.fields[0]); // sort on 1st field as expected
|
||||
assertEquals(i, fieldDoc.fields[1]); // sort on 2nd field as expected
|
||||
}
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
public void testFloatSortOptimization() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
final int numDocs = atLeast(10000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
float f = 1f * i;
|
||||
doc.add(new FloatDocValuesField("my_field", f));
|
||||
doc.add(new FloatPoint("my_field", i));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
final IndexReader reader = DirectoryReader.open(writer);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
|
||||
final Sort sort = new Sort(sortField);
|
||||
final int numHits = 3;
|
||||
final int totalHitsThreshold = 3;
|
||||
|
||||
{ // simple sort
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(1f * i, fieldDoc.fields[0]);
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
assertTrue(topDocs.totalHits.value < numDocs);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -50,5 +50,9 @@ class AssertingLeafCollector extends FilterLeafCollector {
|
|||
lastCollected = doc;
|
||||
}
|
||||
|
||||
}
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() throws IOException {
|
||||
return in.competitiveIterator();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue