LUCENE-9449 Skip docs with _doc sort and "after" (#1725)

- Enhance DocComparator to provide an iterator over competitive
documents when searching with "after". This iterator can quickly position
on the desired "after" document skipping all documents and segments before
"after".

- Redesign numeric comparators to provide skipping functionality
by default.

Relates to LUCENE-9280
This commit is contained in:
Mayya Sharipova 2020-09-08 14:16:27 -04:00 committed by GitHub
parent 98e55f0ea8
commit 99220677fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1391 additions and 1009 deletions

View File

@ -122,6 +122,13 @@ Improvements
* LUCENE-9313: Add SerbianAnalyzer based on the snowball stemmer. (Dragan Ivanovic) * LUCENE-9313: Add SerbianAnalyzer based on the snowball stemmer. (Dragan Ivanovic)
* LUCENE-9449: Enhance DocComparator to provide an iterator over competitive
documents when searching with "after". This iterator can quickly position
on the desired "after" document skipping all documents and segments before
"after". Also redesign numeric comparators to provide skipping functionality
by default. (Mayya Sharipova, Jim Ferenczi)
Bug fixes Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while * LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.comparators.DoubleComparator;
/** /**
* Base class for producing {@link DoubleValues} * Base class for producing {@link DoubleValues}
@ -488,20 +489,26 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
@Override @Override
public FieldComparator<Double> newComparator(String fieldname, int numHits, public FieldComparator<Double> newComparator(String fieldname, int numHits,
int sortPos, boolean reversed) { int sortPos, boolean reversed) {
return new FieldComparator.DoubleComparator(numHits, fieldname, missingValue){ return new DoubleComparator(numHits, fieldname, missingValue, reversed, sortPos) {
LeafReaderContext ctx;
DoubleValuesHolder holder = new DoubleValuesHolder();
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
ctx = context; DoubleValuesHolder holder = new DoubleValuesHolder();
return asNumericDocValues(holder, Double::doubleToLongBits);
}
@Override return new DoubleComparator.DoubleLeafComparator(context) {
public void setScorer(Scorable scorer) throws IOException { LeafReaderContext ctx;
holder.values = producer.getValues(ctx, fromScorer(scorer));
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) {
ctx = context;
return asNumericDocValues(holder, Double::doubleToLongBits);
}
@Override
public void setScorer(Scorable scorer) throws IOException {
holder.values = producer.getValues(ctx, fromScorer(scorer));
super.setScorer(scorer);
}
};
} }
}; };
} }

View File

@ -23,7 +23,6 @@ import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
@ -136,271 +135,11 @@ public abstract class FieldComparator<T> {
} }
} }
/** /**
* Base FieldComparator class for numeric types * Informs the comparator that sort is done on this single field.
* This is useful to enable some optimizations for skipping non-competitive documents.
*/ */
public static abstract class NumericComparator<T extends Number> extends SimpleFieldComparator<T> { public void setSingleSort() {
protected final T missingValue;
protected final String field;
protected NumericDocValues currentReaderValues;
protected NumericComparator(String field, T missingValue) {
this.field = field;
this.missingValue = missingValue;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
currentReaderValues = getNumericDocValues(context, field);
}
/** Retrieves the NumericDocValues for the field in this segment */
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return DocValues.getNumeric(context.reader(), field);
}
}
/** Parses field's values as double (using {@link
* org.apache.lucene.index.LeafReader#getNumericDocValues} and sorts by ascending value */
public static class DoubleComparator extends NumericComparator<Double> {
private final double[] values;
protected double bottom;
protected double topValue;
/**
* Creates a new comparator based on {@link Double#compare} for {@code numHits}.
* When a document has no value for the field, {@code missingValue} is substituted.
*/
public DoubleComparator(int numHits, String field, Double missingValue) {
super(field, missingValue != null ? missingValue : 0.0);
values = new double[numHits];
}
private double getValueForDoc(int doc) throws IOException {
if (currentReaderValues.advanceExact(doc)) {
return Double.longBitsToDouble(currentReaderValues.longValue());
} else {
return missingValue;
}
}
@Override
public int compare(int slot1, int slot2) {
return Double.compare(values[slot1], values[slot2]);
}
@Override
public int compareBottom(int doc) throws IOException {
return Double.compare(bottom, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
}
@Override
public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
@Override
public void setTopValue(Double value) {
topValue = value;
}
@Override
public Double value(int slot) {
return Double.valueOf(values[slot]);
}
@Override
public int compareTop(int doc) throws IOException {
return Double.compare(topValue, getValueForDoc(doc));
}
}
/** Parses field's values as float (using {@link
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
public static class FloatComparator extends NumericComparator<Float> {
private final float[] values;
protected float bottom;
protected float topValue;
/**
* Creates a new comparator based on {@link Float#compare} for {@code numHits}.
* When a document has no value for the field, {@code missingValue} is substituted.
*/
public FloatComparator(int numHits, String field, Float missingValue) {
super(field, missingValue != null ? missingValue : 0.0f);
values = new float[numHits];
}
private float getValueForDoc(int doc) throws IOException {
if (currentReaderValues.advanceExact(doc)) {
return Float.intBitsToFloat((int) currentReaderValues.longValue());
} else {
return missingValue;
}
}
@Override
public int compare(int slot1, int slot2) {
return Float.compare(values[slot1], values[slot2]);
}
@Override
public int compareBottom(int doc) throws IOException {
return Float.compare(bottom, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
}
@Override
public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
@Override
public void setTopValue(Float value) {
topValue = value;
}
@Override
public Float value(int slot) {
return Float.valueOf(values[slot]);
}
@Override
public int compareTop(int doc) throws IOException {
return Float.compare(topValue, getValueForDoc(doc));
}
}
/** Parses field's values as int (using {@link
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
public static class IntComparator extends NumericComparator<Integer> {
private final int[] values;
protected int bottom; // Value of bottom of queue
protected int topValue;
/**
* Creates a new comparator based on {@link Integer#compare} for {@code numHits}.
* When a document has no value for the field, {@code missingValue} is substituted.
*/
public IntComparator(int numHits, String field, Integer missingValue) {
super(field, missingValue != null ? missingValue : 0);
//System.out.println("IntComparator.init");
//new Throwable().printStackTrace(System.out);
values = new int[numHits];
}
private int getValueForDoc(int doc) throws IOException {
if (currentReaderValues.advanceExact(doc)) {
return (int) currentReaderValues.longValue();
} else {
return missingValue;
}
}
@Override
public int compare(int slot1, int slot2) {
return Integer.compare(values[slot1], values[slot2]);
}
@Override
public int compareBottom(int doc) throws IOException {
return Integer.compare(bottom, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
}
@Override
public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
@Override
public void setTopValue(Integer value) {
topValue = value;
}
@Override
public Integer value(int slot) {
return Integer.valueOf(values[slot]);
}
@Override
public int compareTop(int doc) throws IOException {
return Integer.compare(topValue, getValueForDoc(doc));
}
}
/** Parses field's values as long (using {@link
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
public static class LongComparator extends NumericComparator<Long> {
private final long[] values;
protected long bottom;
protected long topValue;
/**
* Creates a new comparator based on {@link Long#compare} for {@code numHits}.
* When a document has no value for the field, {@code missingValue} is substituted.
*/
public LongComparator(int numHits, String field, Long missingValue) {
super(field, missingValue != null ? missingValue : 0L);
values = new long[numHits];
}
private long getValueForDoc(int doc) throws IOException {
if (currentReaderValues.advanceExact(doc)) {
return currentReaderValues.longValue();
} else {
return missingValue;
}
}
@Override
public int compare(int slot1, int slot2) {
return Long.compare(values[slot1], values[slot2]);
}
@Override
public int compareBottom(int doc) throws IOException {
return Long.compare(bottom, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
}
@Override
public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
@Override
public void setTopValue(Long value) {
topValue = value;
}
@Override
public Long value(int slot) {
return Long.valueOf(values[slot]);
}
@Override
public int compareTop(int doc) throws IOException {
return Long.compare(topValue, getValueForDoc(doc));
}
} }
/** Sorts by descending relevance. NOTE: if you are /** Sorts by descending relevance. NOTE: if you are
@ -485,69 +224,6 @@ public abstract class FieldComparator<T> {
return Float.compare(docValue, topValue); return Float.compare(docValue, topValue);
} }
} }
/** Sorts by ascending docID */
public static final class DocComparator extends FieldComparator<Integer> implements LeafFieldComparator {
private final int[] docIDs;
private int docBase;
private int bottom;
private int topValue;
/** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(int numHits) {
docIDs = new int[numHits];
}
@Override
public int compare(int slot1, int slot2) {
// No overflow risk because docIDs are non-negative
return docIDs[slot1] - docIDs[slot2];
}
@Override
public int compareBottom(int doc) {
// No overflow risk because docIDs are non-negative
return bottom - (docBase + doc);
}
@Override
public void copy(int slot, int doc) {
docIDs[slot] = docBase + doc;
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
// TODO: can we "map" our docIDs to the current
// reader? saves having to then subtract on every
// compare call
this.docBase = context.docBase;
return this;
}
@Override
public void setBottom(final int bottom) {
this.bottom = docIDs[bottom];
}
@Override
public void setTopValue(Integer value) {
topValue = value;
}
@Override
public Integer value(int slot) {
return Integer.valueOf(docIDs[slot]);
}
@Override
public int compareTop(int doc) {
int docValue = docBase + doc;
return Integer.compare(topValue, docValue);
}
@Override
public void setScorer(Scorable scorer) {}
}
/** Sorts by field's natural Term sort order, using /** Sorts by field's natural Term sort order, using
* ordinals. This is functionally equivalent to {@link * ordinals. This is functionally equivalent to {@link

View File

@ -58,8 +58,8 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
private final int oneReverseMul; private final int oneReverseMul;
private final FieldComparator<?> oneComparator; private final FieldComparator<?> oneComparator;
public OneComparatorFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) { public OneComparatorFieldValueHitQueue(SortField[] fields, int size) {
super(fields, size, filterNonCompetitiveDocs); super(fields, size);
assert fields.length == 1; assert fields.length == 1;
oneComparator = comparators[0]; oneComparator = comparators[0];
@ -95,8 +95,8 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
*/ */
private static final class MultiComparatorsFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> { private static final class MultiComparatorsFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> {
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) { public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size) {
super(fields, size, filterNonCompetitiveDocs); super(fields, size);
} }
@Override @Override
@ -119,9 +119,9 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
} }
} }
// prevent instantiation and extension. // prevent instantiation and extension.
private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) { private FieldValueHitQueue(SortField[] fields, int size) {
super(size); super(size);
// When we get here, fields.length is guaranteed to be > 0, therefore no // When we get here, fields.length is guaranteed to be > 0, therefore no
// need to check it again. // need to check it again.
@ -136,14 +136,12 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
for (int i = 0; i < numComparators; ++i) { for (int i = 0; i < numComparators; ++i) {
SortField field = fields[i]; SortField field = fields[i];
reverseMul[i] = field.reverse ? -1 : 1; reverseMul[i] = field.reverse ? -1 : 1;
if (i == 0 && filterNonCompetitiveDocs) { comparators[i] = field.getComparator(size, i);
// try to rewrite the 1st comparator to the comparator that can skip non-competitive documents }
// skipping functionality is beneficial only for the 1st comparator if (numComparators == 1) {
comparators[i] = FilteringFieldComparator.wrapToFilteringComparator(field.getComparator(size, i), // inform a comparator that sort is based on this single field
field.reverse, numComparators == 1); // to enable some optimizations for skipping over non-competitive documents
} else { comparators[0].setSingleSort();
comparators[i] = field.getComparator(size, i);
}
} }
} }
@ -158,20 +156,17 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
* priority first); cannot be <code>null</code> or empty * priority first); cannot be <code>null</code> or empty
* @param size * @param size
* The number of hits to retain. Must be greater than zero. * The number of hits to retain. Must be greater than zero.
* @param filterNonCompetitiveDocs
* {@code true} If comparators should be allowed to filter non-competitive documents, {@code false} otherwise
*/ */
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size, public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size) {
boolean filterNonCompetitiveDocs) {
if (fields.length == 0) { if (fields.length == 0) {
throw new IllegalArgumentException("Sort must contain at least one field"); throw new IllegalArgumentException("Sort must contain at least one field");
} }
if (fields.length == 1) { if (fields.length == 1) {
return new OneComparatorFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs); return new OneComparatorFieldValueHitQueue<>(fields, size);
} else { } else {
return new MultiComparatorsFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs); return new MultiComparatorsFieldValueHitQueue<>(fields, size);
} }
} }

View File

@ -1,93 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.index.LeafReaderContext;
import java.io.IOException;
/**
* A wrapper over {@code FieldComparator} that provides a leaf comparator that can filter non-competitive docs.
*/
abstract class FilteringFieldComparator<T> extends FieldComparator<T> {
protected final FieldComparator<T> in;
protected final boolean reverse;
// singleSort is true, if sort is based on a single sort field. As there are no other sorts configured
// as tie breakers, we can filter out docs with equal values.
protected final boolean singleSort;
protected boolean hasTopValue = false;
public FilteringFieldComparator(FieldComparator<T> in, boolean reverse, boolean singleSort) {
this.in = in;
this.reverse = reverse;
this.singleSort = singleSort;
}
@Override
public abstract FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException;
@Override
public int compare(int slot1, int slot2) {
return in.compare(slot1, slot2);
}
@Override
public T value(int slot) {
return in.value(slot);
}
@Override
public void setTopValue(T value) {
in.setTopValue(value);
hasTopValue = true;
}
@Override
public int compareValues(T first, T second) {
return in.compareValues(first, second);
}
/**
* Try to wrap a given field comparator to add to it a functionality to skip over non-competitive docs.
* If for the given comparator the skip functionality is not implemented, return the comparator itself.
* @param comparator comparator to wrap
* @param reverse if this sort is reverse
* @param singleSort true if this sort is based on a single field and there are no other sort fields for tie breaking
* @return comparator wrapped as a filtering comparator or the original comparator if the filtering functionality
* is not implemented for it
*/
public static FieldComparator<?> wrapToFilteringComparator(FieldComparator<?> comparator, boolean reverse, boolean singleSort) {
Class<?> comparatorClass = comparator.getClass();
if (comparatorClass == FieldComparator.LongComparator.class){
return new FilteringNumericComparator<>((FieldComparator.LongComparator) comparator, reverse, singleSort);
}
if (comparatorClass == FieldComparator.IntComparator.class){
return new FilteringNumericComparator<>((FieldComparator.IntComparator) comparator, reverse, singleSort);
}
if (comparatorClass == FieldComparator.DoubleComparator.class){
return new FilteringNumericComparator<>((FieldComparator.DoubleComparator) comparator, reverse, singleSort);
}
if (comparatorClass == FieldComparator.FloatComparator.class){
return new FilteringNumericComparator<>((FieldComparator.FloatComparator) comparator, reverse, singleSort);
}
return comparator;
}
}

View File

@ -1,39 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
/**
* Decorates a wrapped LeafFieldComparator to add a functionality to skip over non-competitive docs.
* FilteringLeafFieldComparator provides two additional functions to a LeafFieldComparator:
* {@code competitiveIterator()} and {@code setCanUpdateIterator()}.
*/
public interface FilteringLeafFieldComparator extends LeafFieldComparator {
/**
* Returns a competitive iterator
* @return an iterator over competitive docs that are stronger than already collected docs
* or {@code null} if such an iterator is not available for the current segment.
*/
DocIdSetIterator competitiveIterator() throws IOException;
/**
* Informs this leaf comparator that it is allowed to start updating its competitive iterator.
* This method is called from a collector when queue becomes full and threshold is reached.
*/
void setCanUpdateIterator() throws IOException;
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.index.LeafReaderContext;
import java.io.IOException;
/**
* A wrapper over {@code NumericComparator} that provides a leaf comparator that can filter non-competitive docs.
*/
class FilteringNumericComparator<T extends Number> extends FilteringFieldComparator<T> {
public FilteringNumericComparator(NumericComparator<T> in, boolean reverse, boolean singleSort) {
super(in, reverse, singleSort);
}
@Override
public final FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
LeafFieldComparator inLeafComparator = in.getLeafComparator(context);
Class<?> comparatorClass = inLeafComparator.getClass();
if (comparatorClass == FieldComparator.LongComparator.class) {
return new FilteringNumericLeafComparator.FilteringLongLeafComparator((FieldComparator.LongComparator) inLeafComparator, context,
((LongComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} if (comparatorClass == FieldComparator.IntComparator.class) {
return new FilteringNumericLeafComparator.FilteringIntLeafComparator((FieldComparator.IntComparator) inLeafComparator, context,
((IntComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} else if (comparatorClass == FieldComparator.DoubleComparator.class) {
return new FilteringNumericLeafComparator.FilteringDoubleLeafComparator((FieldComparator.DoubleComparator) inLeafComparator, context,
((DoubleComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} else if (comparatorClass == FieldComparator.FloatComparator.class) {
return new FilteringNumericLeafComparator.FilteringFloatLeafComparator((FieldComparator.FloatComparator) inLeafComparator, context,
((FloatComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
} else {
throw new IllegalStateException("Unexpected numeric class of ["+ comparatorClass + "] for [FieldComparator]!");
}
}
}

View File

@ -1,336 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.util.DocIdSetBuilder;
import java.io.IOException;
import java.util.Arrays;
/**
* A {@code FilteringLeafFieldComparator} that provides a functionality to skip over non-competitive documents
* for numeric fields indexed with points.
*/
abstract class FilteringNumericLeafComparator implements FilteringLeafFieldComparator {
protected final LeafFieldComparator in;
protected final boolean reverse;
protected final boolean singleSort; //if sort is based on a single sort field as opposed to multiple sort fields
private final boolean hasTopValue;
private final PointValues pointValues;
private final int bytesCount;
private final int maxDoc;
private final byte[] minValueAsBytes;
private final byte[] maxValueAsBytes;
private long iteratorCost;
private int maxDocVisited = 0;
private int updateCounter = 0;
private boolean canUpdateIterator = false; // set to true when queue becomes full and hitsThreshold is reached
private DocIdSetIterator competitiveIterator;
public FilteringNumericLeafComparator(LeafFieldComparator in, LeafReaderContext context, String field,
boolean reverse, boolean singleSort, boolean hasTopValue, int bytesCount) throws IOException {
this.in = in;
this.pointValues = context.reader().getPointValues(field);
this.reverse = reverse;
this.singleSort = singleSort;
this.hasTopValue = hasTopValue;
this.maxDoc = context.reader().maxDoc();
this.bytesCount = bytesCount;
this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : hasTopValue ? new byte[bytesCount] : null;
this.minValueAsBytes = reverse ? new byte[bytesCount] : hasTopValue ? new byte[bytesCount] : null;
// TODO: optimize a case when pointValues are missing only on this segment
this.competitiveIterator = pointValues == null ? null : DocIdSetIterator.all(maxDoc);
this.iteratorCost = maxDoc;
}
@Override
public void setBottom(int slot) throws IOException {
in.setBottom(slot);
updateCompetitiveIterator(); // update an iterator if we set a new bottom
}
@Override
public int compareBottom(int doc) throws IOException {
return in.compareBottom(doc);
}
@Override
public int compareTop(int doc) throws IOException {
return in.compareTop(doc);
}
@Override
public void copy(int slot, int doc) throws IOException {
in.copy(slot, doc);
maxDocVisited = doc;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
in.setScorer(scorer);
if (scorer instanceof Scorer) {
iteratorCost = ((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
updateCompetitiveIterator(); // update an iterator when we have a new segment
}
}
@Override
public void setCanUpdateIterator() throws IOException {
this.canUpdateIterator = true;
updateCompetitiveIterator();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (competitiveIterator == null) return null;
return new DocIdSetIterator() {
private int doc;
@Override
public int nextDoc() throws IOException {
return doc = competitiveIterator.nextDoc();
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return doc = competitiveIterator.advance(target);
}
};
}
// update its iterator to include possibly only docs that are "stronger" than the current bottom entry
private void updateCompetitiveIterator() throws IOException {
if (canUpdateIterator == false) return;
if (pointValues == null) return;
// if some documents have missing points, check that missing values prohibits optimization
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
return; // we can't filter out documents, as documents with missing values are competitive
}
updateCounter++;
if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
return;
}
if (reverse == false) {
encodeBottom(maxValueAsBytes);
if (hasTopValue) {
encodeTop(minValueAsBytes);
}
} else {
encodeBottom(minValueAsBytes);
if (hasTopValue) {
encodeTop(maxValueAsBytes);
}
}
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
PointValues.IntersectVisitor visitor = new PointValues.IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
if (docID <= maxDocVisited) {
return; // Already visited or skipped
}
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (docID <= maxDocVisited) {
return; // already visited or skipped
}
if (maxValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
// if doc's value is too high or for single sort even equal, it is not competitive and the doc can be skipped
if (cmp > 0 || (singleSort && cmp == 0)) return;
}
if (minValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
// if doc's value is too low or for single sort even equal, it is not competitive and the doc can be skipped
if (cmp < 0 || (singleSort && cmp == 0)) return;
}
adder.add(docID); // doc is competitive
}
@Override
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (maxValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(minPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
if (cmp > 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if (minValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
if (cmp < 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if ((maxValueAsBytes != null && Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount) > 0) ||
(minValueAsBytes != null && Arrays.compareUnsigned(minPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount) < 0)) {
return PointValues.Relation.CELL_CROSSES_QUERY;
}
return PointValues.Relation.CELL_INSIDE_QUERY;
}
};
final long threshold = iteratorCost >>> 3;
long estimatedNumberOfMatches = pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
if (estimatedNumberOfMatches >= threshold) {
// the new range is not selective enough to be worth materializing, it doesn't reduce number of docs at least 8x
return;
}
pointValues.intersect(visitor);
competitiveIterator = result.build().iterator();
iteratorCost = competitiveIterator.cost();
}
protected abstract boolean isMissingValueCompetitive();
protected abstract void encodeBottom(byte[] packedValue);
protected abstract void encodeTop(byte[] packedValue);
/**
* A wrapper over double long comparator that adds a functionality to filter non-competitive docs.
*/
static class FilteringLongLeafComparator extends FilteringNumericLeafComparator {
public FilteringLongLeafComparator(FieldComparator.LongComparator in, LeafReaderContext context,
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
super(in, context, field, reverse, singleSort, hasTopValue, Long.BYTES);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Long.compare(((FieldComparator.LongComparator) in).missingValue, ((FieldComparator.LongComparator) in).bottom);
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
LongPoint.encodeDimension(((FieldComparator.LongComparator) in).bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
LongPoint.encodeDimension(((FieldComparator.LongComparator) in).topValue, packedValue, 0);
}
}
/**
* A wrapper over integer leaf comparator that adds a functionality to filter non-competitive docs.
*/
static class FilteringIntLeafComparator extends FilteringNumericLeafComparator {
public FilteringIntLeafComparator(FieldComparator.IntComparator in, LeafReaderContext context,
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
super(in, context, field, reverse, singleSort, hasTopValue, Integer.BYTES);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Integer.compare(((FieldComparator.IntComparator) in).missingValue, ((FieldComparator.IntComparator) in).bottom);
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
IntPoint.encodeDimension(((FieldComparator.IntComparator) in).bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
IntPoint.encodeDimension(((FieldComparator.IntComparator) in).topValue, packedValue, 0);
}
}
/**
* A wrapper over double leaf comparator that adds a functionality to filter non-competitive docs.
*/
static class FilteringDoubleLeafComparator extends FilteringNumericLeafComparator {
public FilteringDoubleLeafComparator(FieldComparator.DoubleComparator in, LeafReaderContext context,
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
super(in, context, field, reverse, singleSort, hasTopValue, Double.BYTES);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Double.compare(((FieldComparator.DoubleComparator) in).missingValue, ((FieldComparator.DoubleComparator) in).bottom);
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
DoublePoint.encodeDimension(((FieldComparator.DoubleComparator) in).bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
DoublePoint.encodeDimension(((FieldComparator.DoubleComparator) in).topValue, packedValue, 0);
}
}
/**
* A wrapper over float leaf comparator that adds a functionality to filter non-competitive docs.
*/
static class FilteringFloatLeafComparator extends FilteringNumericLeafComparator {
public FilteringFloatLeafComparator(FieldComparator.FloatComparator in, LeafReaderContext context,
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
super(in, context, field, reverse, singleSort, hasTopValue, Float.BYTES);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Float.compare(((FieldComparator.FloatComparator) in).missingValue, ((FieldComparator.FloatComparator) in).bottom);
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
FloatPoint.encodeDimension(((FieldComparator.FloatComparator) in).bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
FloatPoint.encodeDimension(((FieldComparator.FloatComparator) in).topValue, packedValue, 0);
}
}
}

View File

@ -116,4 +116,20 @@ public interface LeafFieldComparator {
* obtain the current hit's score, if necessary. */ * obtain the current hit's score, if necessary. */
void setScorer(Scorable scorer) throws IOException; void setScorer(Scorable scorer) throws IOException;
/**
* Returns a competitive iterator
* @return an iterator over competitive docs that are stronger than already collected docs
* or {@code null} if such an iterator is not available for the current comparator or segment.
*/
default DocIdSetIterator competitiveIterator() throws IOException {
return null;
}
/**
* Informs this leaf comparator that hits threshold is reached.
* This method is called from a collector when hits threshold is reached.
*/
default void setHitsThresholdReached() throws IOException{
}
} }

View File

@ -23,6 +23,7 @@ import java.util.Objects;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.comparators.LongComparator;
/** /**
* Base class for producing {@link LongValues} * Base class for producing {@link LongValues}
@ -337,20 +338,26 @@ public abstract class LongValuesSource implements SegmentCacheable {
@Override @Override
public FieldComparator<Long> newComparator(String fieldname, int numHits, public FieldComparator<Long> newComparator(String fieldname, int numHits,
int sortPos, boolean reversed) { int sortPos, boolean reversed) {
return new FieldComparator.LongComparator(numHits, fieldname, missingValue) { return new LongComparator(numHits, fieldname, missingValue, reversed, sortPos) {
LeafReaderContext ctx;
LongValuesHolder holder = new LongValuesHolder();
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
ctx = context; LongValuesHolder holder = new LongValuesHolder();
return asNumericDocValues(holder);
}
@Override return new LongComparator.LongLeafComparator(context) {
public void setScorer(Scorable scorer) throws IOException { LeafReaderContext ctx;
holder.values = producer.getValues(ctx, DoubleValuesSource.fromScorer(scorer));
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) {
ctx = context;
return asNumericDocValues(holder);
}
@Override
public void setScorer(Scorable scorer) throws IOException {
holder.values = producer.getValues(ctx, DoubleValuesSource.fromScorer(scorer));
super.setScorer(scorer);
}
};
} }
}; };
} }

View File

@ -89,4 +89,15 @@ final class MultiLeafFieldComparator implements LeafFieldComparator {
} }
} }
@Override
public void setHitsThresholdReached() throws IOException {
// this is needed for skipping functionality that is only relevant for the 1st comparator
firstComparator.setHitsThresholdReached();
}
@Override
public DocIdSetIterator competitiveIterator() throws IOException {
// this is needed for skipping functionality that is only relevant for the 1st comparator
return firstComparator.competitiveIterator();
}
} }

View File

@ -24,6 +24,11 @@ import java.util.Objects;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexSorter; import org.apache.lucene.index.IndexSorter;
import org.apache.lucene.index.SortFieldProvider; import org.apache.lucene.index.SortFieldProvider;
import org.apache.lucene.search.comparators.DocComparator;
import org.apache.lucene.search.comparators.DoubleComparator;
import org.apache.lucene.search.comparators.FloatComparator;
import org.apache.lucene.search.comparators.IntComparator;
import org.apache.lucene.search.comparators.LongComparator;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -452,19 +457,19 @@ public class SortField {
return new FieldComparator.RelevanceComparator(numHits); return new FieldComparator.RelevanceComparator(numHits);
case DOC: case DOC:
return new FieldComparator.DocComparator(numHits); return new DocComparator(numHits, reverse, sortPos);
case INT: case INT:
return new FieldComparator.IntComparator(numHits, field, (Integer) missingValue); return new IntComparator(numHits, field, (Integer) missingValue, reverse, sortPos);
case FLOAT: case FLOAT:
return new FieldComparator.FloatComparator(numHits, field, (Float) missingValue); return new FloatComparator(numHits, field, (Float) missingValue, reverse, sortPos);
case LONG: case LONG:
return new FieldComparator.LongComparator(numHits, field, (Long) missingValue); return new LongComparator(numHits, field, (Long) missingValue, reverse, sortPos);
case DOUBLE: case DOUBLE:
return new FieldComparator.DoubleComparator(numHits, field, (Double) missingValue); return new DoubleComparator(numHits, field, (Double) missingValue, reverse, sortPos);
case CUSTOM: case CUSTOM:
assert comparatorSource != null; assert comparatorSource != null;

View File

@ -26,6 +26,10 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortFieldProvider; import org.apache.lucene.index.SortFieldProvider;
import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.comparators.DoubleComparator;
import org.apache.lucene.search.comparators.FloatComparator;
import org.apache.lucene.search.comparators.IntComparator;
import org.apache.lucene.search.comparators.LongComparator;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
@ -225,32 +229,52 @@ public class SortedNumericSortField extends SortField {
public FieldComparator<?> getComparator(int numHits, int sortPos) { public FieldComparator<?> getComparator(int numHits, int sortPos) {
switch(type) { switch(type) {
case INT: case INT:
return new FieldComparator.IntComparator(numHits, getField(), (Integer) missingValue) { return new IntComparator(numHits, getField(), (Integer) missingValue, reverse, sortPos) {
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type); return new IntLeafComparator(context) {
} @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
}
}; };
case FLOAT: case FLOAT:
return new FieldComparator.FloatComparator(numHits, getField(), (Float) missingValue) { return new FloatComparator(numHits, getField(), (Float) missingValue, reverse, sortPos) {
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type); return new FloatLeafComparator(context) {
} @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
}
}; };
case LONG: case LONG:
return new FieldComparator.LongComparator(numHits, getField(), (Long) missingValue) { return new LongComparator(numHits, getField(), (Long) missingValue, reverse, sortPos) {
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type); return new LongLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
} }
}; };
case DOUBLE: case DOUBLE:
return new FieldComparator.DoubleComparator(numHits, getField(), (Double) missingValue) { return new DoubleComparator(numHits, getField(), (Double) missingValue, reverse, sortPos) {
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type); return new DoubleLeafComparator(context) {
} @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
}
}; };
default: default:
throw new AssertionError(); throw new AssertionError();

View File

@ -49,13 +49,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
private static abstract class MultiComparatorLeafCollector implements LeafCollector { private static abstract class MultiComparatorLeafCollector implements LeafCollector {
final LeafFieldComparator comparator; final LeafFieldComparator comparator;
final FilteringLeafFieldComparator filteringLeafComparator;
final int reverseMul; final int reverseMul;
Scorable scorer; Scorable scorer;
MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) { MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
this.filteringLeafComparator = comparators[0] instanceof FilteringLeafFieldComparator ?
(FilteringLeafFieldComparator) comparators[0] : null;
if (comparators.length == 1) { if (comparators.length == 1) {
this.reverseMul = reverseMul[0]; this.reverseMul = reverseMul[0];
this.comparator = comparators[0]; this.comparator = comparators[0];
@ -90,12 +87,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) { if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer); updateGlobalMinCompetitiveScore(scorer);
} }
if (scoreMode.isExhaustive() == false && totalHitsRelation == TotalHits.Relation.EQUAL_TO &&
if (filteringLeafComparator != null && queueFull && hitsThresholdChecker.isThresholdReached()) {
hitsThresholdChecker.isThresholdReached() && totalHitsRelation == TotalHits.Relation.EQUAL_TO) { // for the first time hitsThreshold is reached, notify comparator about this
// for the first time queue becomes full and hitsThreshold is reached, comparator.setHitsThresholdReached();
// notify leaf comparator that its competitive iterator can be updated
filteringLeafComparator.setCanUpdateIterator();
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
} }
} }
@ -150,25 +145,17 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
if (minScoreAcc != null) { if (minScoreAcc != null) {
updateGlobalMinCompetitiveScore(scorer); updateGlobalMinCompetitiveScore(scorer);
} }
if (filteringLeafComparator != null && queueFull && hitsThresholdChecker.isThresholdReached()) {
// if queue became full and hitsThreshold was reached in previous segments,
// notify this segment's leaf comparator that its competitive iterator can be updated
filteringLeafComparator.setCanUpdateIterator();
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
} }
@Override @Override
public DocIdSetIterator competitiveIterator() throws IOException { public DocIdSetIterator competitiveIterator() throws IOException {
if (filteringLeafComparator == null) { return comparator.competitiveIterator();
return null;
} else {
return filteringLeafComparator.competitiveIterator();
}
} }
} }
// TODO: remove this code when all bulk scores similar to {@code DefaultBulkScorer} use collectors' iterator,
// as early termination should be implemented in their respective comparators and removed from a collector
static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) { static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
return canEarlyTerminateOnDocId(searchSort) || return canEarlyTerminateOnDocId(searchSort) ||
canEarlyTerminateOnPrefix(searchSort, indexSort); canEarlyTerminateOnPrefix(searchSort, indexSort);
@ -339,8 +326,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
} else { } else {
relevanceComparator = null; relevanceComparator = null;
canSetMinScore = false; canSetMinScore = false;
if (firstComparator instanceof FilteringFieldComparator) { if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
assert hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE;
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS; scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
} else { } else {
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES; scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
@ -469,9 +455,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
throw new IllegalArgumentException("hitsThresholdChecker should not be null"); throw new IllegalArgumentException("hitsThresholdChecker should not be null");
} }
// here we assume that if hitsThreshold was set, we let a comparator to skip non-competitive docs FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
boolean filterNonCompetitiveDocs = hitsThresholdChecker.getHitsThreshold() == Integer.MAX_VALUE ? false : true;
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits, filterNonCompetitiveDocs);
if (after == null) { if (after == null) {
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc); return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);

View File

@ -0,0 +1,185 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import java.io.IOException;
/**
* Comparator that sorts by asc _doc
*/
public class DocComparator extends FieldComparator<Integer> {
private final int[] docIDs;
private final boolean enableSkipping; // if skipping functionality should be enabled
private int bottom;
private int topValue;
private boolean topValueSet;
private boolean bottomValueSet;
private boolean hitsThresholdReached;
/** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(int numHits, boolean reverse, int sortPost) {
this.docIDs = new int[numHits];
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (reverse == false && sortPost == 0);
}
@Override
public int compare(int slot1, int slot2) {
// No overflow risk because docIDs are non-negative
return docIDs[slot1] - docIDs[slot2];
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
// TODO: can we "map" our docIDs to the current
// reader? saves having to then subtract on every
// compare call
return new DocLeafComparator(context);
}
@Override
public void setTopValue(Integer value) {
topValue = value;
topValueSet = true;
}
@Override
public Integer value(int slot) {
return Integer.valueOf(docIDs[slot]);
}
/**
* DocLeafComparator with skipping functionality.
* When sort by _doc asc, after collecting top N matches and enough hits, the comparator
* can skip all the following documents.
* When sort by _doc asc and "top" document is set after which search should start,
* the comparator provides an iterator that can quickly skip to the desired "top" document.
*/
private class DocLeafComparator implements LeafFieldComparator {
private final int docBase;
private final int minDoc;
private final int maxDoc;
private DocIdSetIterator competitiveIterator; // iterator that starts from topValue
public DocLeafComparator(LeafReaderContext context) {
this.docBase = context.docBase;
if (enableSkipping) {
this.minDoc = topValue + 1;
this.maxDoc = context.reader().maxDoc();
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
} else {
this.minDoc = -1;
this.maxDoc = -1;
this.competitiveIterator = null;
}
}
@Override
public void setBottom(int slot) {
bottom = docIDs[slot];
bottomValueSet = true;
updateIterator();
}
@Override
public int compareBottom(int doc) {
// No overflow risk because docIDs are non-negative
return bottom - (docBase + doc);
}
@Override
public int compareTop(int doc) {
int docValue = docBase + doc;
return Integer.compare(topValue, docValue);
}
@Override
public void copy(int slot, int doc) throws IOException {
docIDs[slot] = docBase + doc;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
// update an iterator on a new segment
updateIterator();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) {
return null;
} else {
return new DocIdSetIterator() {
private int doc;
@Override
public int nextDoc() throws IOException {
return doc = competitiveIterator.nextDoc();
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return doc = competitiveIterator.advance(target);
}
};
}
}
@Override
public void setHitsThresholdReached() {
hitsThresholdReached = true;
updateIterator();
}
private void updateIterator() {
if (enableSkipping == false || hitsThresholdReached == false) return;
if (bottomValueSet) {
// since we've collected top N matches, we can early terminate
// Currently early termination on _doc is also implemented in TopFieldCollector, but this will be removed
// once all bulk scores uses collectors' iterators
competitiveIterator = DocIdSetIterator.empty();
} else if (topValueSet) {
// skip to the desired top doc
if (docBase + maxDoc <= minDoc) {
competitiveIterator = DocIdSetIterator.empty(); // skip this segment
} else {
int segmentMinDoc = Math.max(0, minDoc - docBase);
competitiveIterator = new MinDocIterator(segmentMinDoc, maxDoc);
}
}
}
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import java.io.IOException;
/**
* Comparator based on {@link Double#compare} for {@code numHits}.
* This comparator provides a skipping functionality an iterator that can skip over non-competitive documents.
*/
public class DoubleComparator extends NumericComparator<Double> {
private final double[] values;
protected double topValue;
protected double bottom;
public DoubleComparator(int numHits, String field, Double missingValue, boolean reverse, int sortPos) {
super(field, missingValue != null ? missingValue : 0.0, reverse, sortPos, Double.BYTES);
values = new double[numHits];
}
@Override
public int compare(int slot1, int slot2) {
return Double.compare(values[slot1], values[slot2]);
}
@Override
public void setTopValue(Double value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Double value(int slot) {
return Double.valueOf(values[slot]);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new DoubleLeafComparator(context);
}
/**
* Leaf comparator for {@link DoubleComparator} that provides skipping functionality
*/
public class DoubleLeafComparator extends NumericLeafComparator {
public DoubleLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private double getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return Double.longBitsToDouble(docValues.longValue());
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values[slot];
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Double.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Double.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Double.compare(missingValue, bottom);
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
DoublePoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
DoublePoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import java.io.IOException;
/**
* Comparator based on {@link Float#compare} for {@code numHits}.
* This comparator provides a skipping functionality an iterator that can skip over non-competitive documents.
*/
public class FloatComparator extends NumericComparator<Float> {
private final float[] values;
protected float topValue;
protected float bottom;
public FloatComparator(int numHits, String field, Float missingValue, boolean reverse, int sortPos) {
super(field, missingValue != null ? missingValue : 0.0f, reverse, sortPos, Float.BYTES);
values = new float[numHits];
}
@Override
public int compare(int slot1, int slot2) {
return Float.compare(values[slot1], values[slot2]);
}
@Override
public void setTopValue(Float value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Float value(int slot) {
return Float.valueOf(values[slot]);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new FloatLeafComparator(context);
}
/**
* Leaf comparator for {@link FloatComparator} that provides skipping functionality
*/
public class FloatLeafComparator extends NumericLeafComparator {
public FloatLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private float getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return Float.intBitsToFloat((int) docValues.longValue());
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values[slot];
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Float.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Float.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Float.compare(missingValue, bottom);
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
FloatPoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
FloatPoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import java.io.IOException;
/**
* Comparator based on {@link Integer#compare} for {@code numHits}.
* This comparator provides a skipping functionality an iterator that can skip over non-competitive documents.
*/
public class IntComparator extends NumericComparator<Integer> {
private final int[] values;
protected int topValue;
protected int bottom;
public IntComparator(int numHits, String field, Integer missingValue, boolean reverse, int sortPos) {
super(field, missingValue != null ? missingValue : 0, reverse, sortPos, Integer.BYTES);
values = new int[numHits];
}
@Override
public int compare(int slot1, int slot2) {
return Integer.compare(values[slot1], values[slot2]);
}
@Override
public void setTopValue(Integer value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Integer value(int slot) {
return Integer.valueOf(values[slot]);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new IntLeafComparator(context);
}
/**
* Leaf comparator for {@link IntComparator} that provides skipping functionality
*/
public class IntLeafComparator extends NumericLeafComparator {
public IntLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private int getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return (int) docValues.longValue();
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values[slot];
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Integer.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Integer.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Integer.compare(missingValue, bottom);
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
IntPoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
IntPoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import java.io.IOException;
/**
* Comparator based on {@link Long#compare} for {@code numHits}.
* This comparator provides a skipping functionality an iterator that can skip over non-competitive documents.
*/
public class LongComparator extends NumericComparator<Long> {
private final long[] values;
protected long topValue;
protected long bottom;
public LongComparator(int numHits, String field, Long missingValue, boolean reverse, int sortPos) {
super(field,missingValue != null ? missingValue : 0L, reverse, sortPos, Long.BYTES);
values = new long[numHits];
}
@Override
public int compare(int slot1, int slot2) {
return Long.compare(values[slot1], values[slot2]);
}
@Override
public void setTopValue(Long value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Long value(int slot) {
return Long.valueOf(values[slot]);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new LongLeafComparator(context);
}
/**
* Leaf comparator for {@link LongComparator} that provides skipping functionality
*/
public class LongLeafComparator extends NumericLeafComparator {
public LongLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private long getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return docValues.longValue();
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values[slot];
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Long.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Long.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = getValueForDoc(doc);
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Long.compare(missingValue, bottom);
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
LongPoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
LongPoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.search.DocIdSetIterator;
import java.io.IOException;
/**
* Docs iterator that starts iterating from a configurable minimum document
*/
public class MinDocIterator extends DocIdSetIterator {
final int segmentMinDoc;
final int maxDoc;
int doc = -1;
MinDocIterator(int segmentMinDoc, int maxDoc) {
this.segmentMinDoc = segmentMinDoc;
this.maxDoc = maxDoc;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
assert target > doc;
if (doc == -1) {
// skip directly to minDoc
doc = Math.max(target, segmentMinDoc);
} else {
doc = target;
}
if (doc >= maxDoc) {
doc = NO_MORE_DOCS;
}
return doc;
}
@Override
public long cost() {
return maxDoc - segmentMinDoc;
}
}

View File

@ -0,0 +1,253 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.DocIdSetBuilder;
import java.io.IOException;
import java.util.Arrays;
/**
* Abstract numeric comparator for comparing numeric values.
* This comparator provides a skipping functionality an iterator that can skip over non-competitive documents.
*/
public abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
protected final T missingValue;
protected final String field;
protected final boolean reverse;
protected final boolean primarySort;
private final int bytesCount; // how many bytes are used to encode this number
protected boolean topValueSet;
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
protected boolean hitsThresholdReached;
protected boolean queueFull;
protected NumericComparator(String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
this.field = field;
this.missingValue = missingValue;
this.reverse = reverse;
this.primarySort = (sortPos == 0);
this.bytesCount = bytesCount;
}
@Override
public void setTopValue(T value) {
topValueSet = true;
}
@Override
public void setSingleSort() {
singleSort = true;
}
/**
* Leaf comparator for {@link NumericComparator} that provides skipping functionality
*/
public abstract class NumericLeafComparator implements LeafFieldComparator {
protected final NumericDocValues docValues;
private final PointValues pointValues;
private final boolean enableSkipping; // if skipping functionality should be enabled
private final int maxDoc;
private final byte[] minValueAsBytes;
private final byte[] maxValueAsBytes;
private DocIdSetIterator competitiveIterator;
private long iteratorCost;
private int maxDocVisited = 0;
private int updateCounter = 0;
public NumericLeafComparator(LeafReaderContext context) throws IOException {
this.docValues = getNumericDocValues(context, field);
this.pointValues = primarySort ? context.reader().getPointValues(field) : null;
if (pointValues != null) {
this.enableSkipping = true; // skipping is enabled on primarySort and when points are available
this.maxDoc = context.reader().maxDoc();
this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.minValueAsBytes = reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
this.iteratorCost = maxDoc;
} else {
this.enableSkipping = false;
this.maxDoc = 0;
this.maxValueAsBytes = null;
this.minValueAsBytes = null;
}
}
/** Retrieves the NumericDocValues for the field in this segment */
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return DocValues.getNumeric(context.reader(), field);
}
@Override
public void setBottom(int slot) throws IOException {
queueFull = true; // if we are setting bottom, it means that we have collected enough hits
updateCompetitiveIterator(); // update an iterator if we set a new bottom
}
@Override
public void copy(int slot, int doc) throws IOException {
maxDocVisited = doc;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
if (scorer instanceof Scorer) {
iteratorCost = ((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
updateCompetitiveIterator(); // update an iterator when we have a new segment
}
}
@Override
public void setHitsThresholdReached() throws IOException {
hitsThresholdReached = true;
updateCompetitiveIterator();
}
// update its iterator to include possibly only docs that are "stronger" than the current bottom entry
private void updateCompetitiveIterator() throws IOException {
if (enableSkipping == false || hitsThresholdReached == false || queueFull == false) return;
// if some documents have missing points, check that missing values prohibits optimization
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
return; // we can't filter out documents, as documents with missing values are competitive
}
updateCounter++;
if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
return;
}
if (reverse == false) {
encodeBottom(maxValueAsBytes);
if (topValueSet) {
encodeTop(minValueAsBytes);
}
} else {
encodeBottom(minValueAsBytes);
if (topValueSet) {
encodeTop(maxValueAsBytes);
}
}
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
PointValues.IntersectVisitor visitor = new PointValues.IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
if (docID <= maxDocVisited) {
return; // Already visited or skipped
}
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (docID <= maxDocVisited) {
return; // already visited or skipped
}
if (maxValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
// if doc's value is too high or for single sort even equal, it is not competitive and the doc can be skipped
if (cmp > 0 || (singleSort && cmp == 0)) return;
}
if (minValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
// if doc's value is too low or for single sort even equal, it is not competitive and the doc can be skipped
if (cmp < 0 || (singleSort && cmp == 0)) return;
}
adder.add(docID); // doc is competitive
}
@Override
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (maxValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(minPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
if (cmp > 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if (minValueAsBytes != null) {
int cmp = Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
if (cmp < 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if ((maxValueAsBytes != null && Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount) > 0) ||
(minValueAsBytes != null && Arrays.compareUnsigned(minPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount) < 0)) {
return PointValues.Relation.CELL_CROSSES_QUERY;
}
return PointValues.Relation.CELL_INSIDE_QUERY;
}
};
final long threshold = iteratorCost >>> 3;
long estimatedNumberOfMatches = pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
if (estimatedNumberOfMatches >= threshold) {
// the new range is not selective enough to be worth materializing, it doesn't reduce number of docs at least 8x
return;
}
pointValues.intersect(visitor);
competitiveIterator = result.build().iterator();
iteratorCost = competitiveIterator.cost();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) return null;
return new DocIdSetIterator() {
private int doc;
@Override
public int nextDoc() throws IOException {
return doc = competitiveIterator.nextDoc();
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return doc = competitiveIterator.advance(target);
}
};
}
protected abstract boolean isMissingValueCompetitive();
protected abstract void encodeBottom(byte[] packedValue);
protected abstract void encodeTop(byte[] packedValue);
}
}

View File

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Comparators, used to compare hits so as to determine their
* sort order when collecting the top results with
* {@link org.apache.lucene.search.TopFieldCollector}.
*/
package org.apache.lucene.search.comparators;

View File

@ -17,20 +17,25 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException; import java.io.IOException;
import static org.apache.lucene.search.SortField.FIELD_DOC;
import static org.apache.lucene.search.SortField.FIELD_SCORE; import static org.apache.lucene.search.SortField.FIELD_SCORE;
public class TestFieldSortOptimizationSkipping extends LuceneTestCase { public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
@ -97,6 +102,14 @@ public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
assertTrue(topDocs.totalHits.value < numDocs); assertTrue(topDocs.totalHits.value < numDocs);
} }
{ // test that if numeric field is a secondary sort, no optimization is run
final TopFieldCollector collector = TopFieldCollector.create(new Sort(FIELD_SCORE, sortField), numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
}
writer.close(); writer.close();
reader.close(); reader.close();
dir.close(); dir.close();
@ -290,5 +303,138 @@ public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
dir.close(); dir.close();
} }
public void testDocSortOptimizationWithAfter() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(150);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
writer.addDocument(doc);
if ((i > 0) && (i % 50 == 0)) {
writer.commit();
}
}
final IndexReader reader = DirectoryReader.open(writer);
IndexSearcher searcher = new IndexSearcher(reader);
final int numHits = 3;
final int totalHitsThreshold = 3;
final int[] searchAfters = {10, 140, numDocs - 4};
for (int searchAfter : searchAfters) {
// sort by _doc with search after should trigger optimization
{
final Sort sort = new Sort(FIELD_DOC);
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
int expectedDocID = searchAfter + 1 + i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
assertTrue(collector.isEarlyTerminated());
// check that very few docs were collected
assertTrue(topDocs.totalHits.value < 10);
}
// sort by _doc + _score with search after should trigger optimization
{
final Sort sort = new Sort(FIELD_DOC, FIELD_SCORE);
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Object[]{searchAfter, 1.0f});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
int expectedDocID = searchAfter + 1 + i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
assertTrue(collector.isEarlyTerminated());
// assert that very few docs were collected
assertTrue(topDocs.totalHits.value < 10);
}
// sort by _doc desc should not trigger optimization
{
final Sort sort = new Sort(new SortField(null, SortField.Type.DOC, true));
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
int expectedDocID = searchAfter - 1 - i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
// assert that all documents were collected
assertEquals(numDocs, topDocs.totalHits.value);
}
}
writer.close();
reader.close();
dir.close();
}
public void testDocSortOptimization() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(100);
int seg = 1;
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new LongPoint("lf", i));
doc.add(new StoredField("slf", i));
doc.add(new StringField("tf", "seg" + seg, Field.Store.YES));
writer.addDocument(doc);
if ((i > 0) && (i % 50 == 0)) {
writer.commit();
seg++;
}
}
final IndexReader reader = DirectoryReader.open(writer);
IndexSearcher searcher = new IndexSearcher(reader);
final int numHits = 3;
final int totalHitsThreshold = 3;
final Sort sort = new Sort(FIELD_DOC);
// sort by _doc should skip all non-competitive documents
{
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
assertEquals(i, topDocs.scoreDocs[i].doc);
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < 10); // assert that very few docs were collected
}
// sort by _doc with a bool query should skip all non-competitive documents
{
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
int lowerRange = 40;
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(LongPoint.newRangeQuery("lf", lowerRange, Long.MAX_VALUE), BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term("tf", "seg1")), BooleanClause.Occur.MUST);
searcher.search(bq.build(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
Document d = searcher.doc(topDocs.scoreDocs[i].doc);
assertEquals(Integer.toString(i + lowerRange), d.get("slf"));
assertEquals("seg1", d.get("tf"));
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < 10); // assert that very few docs were collected
}
writer.close();
reader.close();
dir.close();
}
} }

View File

@ -76,7 +76,7 @@ public class TestNeedsScores extends LuceneTestCase {
/** when not sorting by score */ /** when not sorting by score */
public void testSortByField() throws Exception { public void testSortByField() throws Exception {
Query query = new AssertNeedsScores(new MatchAllDocsQuery(), ScoreMode.COMPLETE_NO_SCORES); Query query = new AssertNeedsScores(new MatchAllDocsQuery(), ScoreMode.TOP_DOCS);
assertEquals(5, searcher.search(query, 5, Sort.INDEXORDER).totalHits.value); assertEquals(5, searcher.search(query, 5, Sort.INDEXORDER).totalHits.value);
} }

View File

@ -26,7 +26,12 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.comparators.DoubleComparator;
import org.apache.lucene.search.comparators.FloatComparator;
import org.apache.lucene.search.comparators.IntComparator;
import org.apache.lucene.search.comparators.LongComparator;
import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
@ -94,13 +99,13 @@ public class ToParentBlockJoinSortField extends SortField {
case STRING: case STRING:
return getStringComparator(numHits); return getStringComparator(numHits);
case DOUBLE: case DOUBLE:
return getDoubleComparator(numHits); return getDoubleComparator(numHits, sortPos);
case FLOAT: case FLOAT:
return getFloatComparator(numHits); return getFloatComparator(numHits, sortPos);
case LONG: case LONG:
return getLongComparator(numHits); return getLongComparator(numHits, sortPos);
case INT: case INT:
return getIntComparator(numHits); return getIntComparator(numHits, sortPos);
default: default:
throw new UnsupportedOperationException("Sort type " + getType() + " is not supported"); throw new UnsupportedOperationException("Sort type " + getType() + " is not supported");
} }
@ -126,84 +131,104 @@ public class ToParentBlockJoinSortField extends SortField {
}; };
} }
private FieldComparator<?> getIntComparator(int numHits) { private FieldComparator<?> getIntComparator(int numHits, int sortPos) {
return new FieldComparator.IntComparator(numHits, getField(), (Integer) missingValue) { return new IntComparator(numHits, getField(), (Integer) missingValue, getReverse(), sortPos) {
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field); return new IntLeafComparator(context) {
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children));
}
};
}
private FieldComparator<?> getLongComparator(int numHits) {
return new FieldComparator.LongComparator(numHits, getField(), (Long) missingValue) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children));
}
};
}
private FieldComparator<?> getFloatComparator(int numHits) {
return new FieldComparator.FloatComparator(numHits, getField(), (Float) missingValue) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return new FilterNumericDocValues(BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children))) {
@Override @Override
public long longValue() throws IOException { protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
// undo the numericutils sortability SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
return NumericUtils.sortableFloatBits((int) super.longValue()); final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children));
} }
}; };
} }
}; };
} }
private FieldComparator<?> getDoubleComparator(int numHits) { private FieldComparator<?> getLongComparator(int numHits, int sortPos) {
return new FieldComparator.DoubleComparator(numHits, getField(), (Double) missingValue) { return new LongComparator(numHits, getField(), (Long) missingValue, getReverse(), sortPos) {
@Override @Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field); return new LongLeafComparator(context) {
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return new FilterNumericDocValues(BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children))) {
@Override @Override
public long longValue() throws IOException { protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
// undo the numericutils sortability SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
return NumericUtils.sortableDoubleBits(super.longValue()); final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children));
}
};
}
};
}
private FieldComparator<?> getFloatComparator(int numHits, int sortPos) {
return new FloatComparator(numHits, getField(), (Float) missingValue, getReverse(), sortPos) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new FloatLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return new FilterNumericDocValues(BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children))) {
@Override
public long longValue() throws IOException {
// undo the numericutils sortability
return NumericUtils.sortableFloatBits((int) super.longValue());
}
};
}
};
};
};
}
private FieldComparator<?> getDoubleComparator(int numHits, int sortPost) {
return new DoubleComparator(numHits, getField(), (Double) missingValue, getReverse(), sortPost) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new DoubleLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return new FilterNumericDocValues(BlockJoinSelector.wrap(sortedNumeric, type, parents, toIter(children))) {
@Override
public long longValue() throws IOException {
// undo the numericutils sortability
return NumericUtils.sortableDoubleBits(super.longValue());
}
};
} }
}; };
} }