mirror of https://github.com/apache/lucene.git
LUCENE-9449 Skip docs with _doc sort and "after" (#1725)
- Enhance DocComparator to provide an iterator over competitive documents when searching with "after". This iterator can quickly position on the desired "after" document skipping all documents and segments before "after". - Redesign numeric comparators to provide skipping functionality by default. Relates to LUCENE-9280
This commit is contained in:
parent
98e55f0ea8
commit
99220677fe
|
@ -122,6 +122,13 @@ Improvements
|
|||
|
||||
* LUCENE-9313: Add SerbianAnalyzer based on the snowball stemmer. (Dragan Ivanovic)
|
||||
|
||||
* LUCENE-9449: Enhance DocComparator to provide an iterator over competitive
|
||||
documents when searching with "after". This iterator can quickly position
|
||||
on the desired "after" document skipping all documents and segments before
|
||||
"after". Also redesign numeric comparators to provide skipping functionality
|
||||
by default. (Mayya Sharipova, Jim Ferenczi)
|
||||
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.comparators.DoubleComparator;
|
||||
|
||||
/**
|
||||
* Base class for producing {@link DoubleValues}
|
||||
|
@ -488,13 +489,16 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
|
|||
@Override
|
||||
public FieldComparator<Double> newComparator(String fieldname, int numHits,
|
||||
int sortPos, boolean reversed) {
|
||||
return new FieldComparator.DoubleComparator(numHits, fieldname, missingValue){
|
||||
|
||||
LeafReaderContext ctx;
|
||||
return new DoubleComparator(numHits, fieldname, missingValue, reversed, sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
DoubleValuesHolder holder = new DoubleValuesHolder();
|
||||
|
||||
return new DoubleComparator.DoubleLeafComparator(context) {
|
||||
LeafReaderContext ctx;
|
||||
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) {
|
||||
ctx = context;
|
||||
return asNumericDocValues(holder, Double::doubleToLongBits);
|
||||
}
|
||||
|
@ -502,6 +506,9 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
|
|||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
holder.values = producer.getValues(ctx, fromScorer(scorer));
|
||||
super.setScorer(scorer);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -136,271 +135,11 @@ public abstract class FieldComparator<T> {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Base FieldComparator class for numeric types
|
||||
* Informs the comparator that sort is done on this single field.
|
||||
* This is useful to enable some optimizations for skipping non-competitive documents.
|
||||
*/
|
||||
public static abstract class NumericComparator<T extends Number> extends SimpleFieldComparator<T> {
|
||||
protected final T missingValue;
|
||||
protected final String field;
|
||||
protected NumericDocValues currentReaderValues;
|
||||
|
||||
protected NumericComparator(String field, T missingValue) {
|
||||
this.field = field;
|
||||
this.missingValue = missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
currentReaderValues = getNumericDocValues(context, field);
|
||||
}
|
||||
|
||||
/** Retrieves the NumericDocValues for the field in this segment */
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
return DocValues.getNumeric(context.reader(), field);
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as double (using {@link
|
||||
* org.apache.lucene.index.LeafReader#getNumericDocValues} and sorts by ascending value */
|
||||
public static class DoubleComparator extends NumericComparator<Double> {
|
||||
private final double[] values;
|
||||
protected double bottom;
|
||||
protected double topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Double#compare} for {@code numHits}.
|
||||
* When a document has no value for the field, {@code missingValue} is substituted.
|
||||
*/
|
||||
public DoubleComparator(int numHits, String field, Double missingValue) {
|
||||
super(field, missingValue != null ? missingValue : 0.0);
|
||||
values = new double[numHits];
|
||||
}
|
||||
|
||||
private double getValueForDoc(int doc) throws IOException {
|
||||
if (currentReaderValues.advanceExact(doc)) {
|
||||
return Double.longBitsToDouble(currentReaderValues.longValue());
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Double.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Double.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Double value) {
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double value(int slot) {
|
||||
return Double.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Double.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as float (using {@link
|
||||
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
|
||||
public static class FloatComparator extends NumericComparator<Float> {
|
||||
private final float[] values;
|
||||
protected float bottom;
|
||||
protected float topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Float#compare} for {@code numHits}.
|
||||
* When a document has no value for the field, {@code missingValue} is substituted.
|
||||
*/
|
||||
public FloatComparator(int numHits, String field, Float missingValue) {
|
||||
super(field, missingValue != null ? missingValue : 0.0f);
|
||||
values = new float[numHits];
|
||||
}
|
||||
|
||||
private float getValueForDoc(int doc) throws IOException {
|
||||
if (currentReaderValues.advanceExact(doc)) {
|
||||
return Float.intBitsToFloat((int) currentReaderValues.longValue());
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Float.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Float.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Float value) {
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Float value(int slot) {
|
||||
return Float.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Float.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as int (using {@link
|
||||
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
|
||||
public static class IntComparator extends NumericComparator<Integer> {
|
||||
private final int[] values;
|
||||
protected int bottom; // Value of bottom of queue
|
||||
protected int topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Integer#compare} for {@code numHits}.
|
||||
* When a document has no value for the field, {@code missingValue} is substituted.
|
||||
*/
|
||||
public IntComparator(int numHits, String field, Integer missingValue) {
|
||||
super(field, missingValue != null ? missingValue : 0);
|
||||
//System.out.println("IntComparator.init");
|
||||
//new Throwable().printStackTrace(System.out);
|
||||
values = new int[numHits];
|
||||
}
|
||||
|
||||
private int getValueForDoc(int doc) throws IOException {
|
||||
if (currentReaderValues.advanceExact(doc)) {
|
||||
return (int) currentReaderValues.longValue();
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Integer.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Integer.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Integer value) {
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Integer.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as long (using {@link
|
||||
* org.apache.lucene.index.LeafReader#getNumericDocValues(String)} and sorts by ascending value */
|
||||
public static class LongComparator extends NumericComparator<Long> {
|
||||
private final long[] values;
|
||||
protected long bottom;
|
||||
protected long topValue;
|
||||
|
||||
/**
|
||||
* Creates a new comparator based on {@link Long#compare} for {@code numHits}.
|
||||
* When a document has no value for the field, {@code missingValue} is substituted.
|
||||
*/
|
||||
public LongComparator(int numHits, String field, Long missingValue) {
|
||||
super(field, missingValue != null ? missingValue : 0L);
|
||||
values = new long[numHits];
|
||||
}
|
||||
|
||||
private long getValueForDoc(int doc) throws IOException {
|
||||
if (currentReaderValues.advanceExact(doc)) {
|
||||
return currentReaderValues.longValue();
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Long.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Long.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Long value) {
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long value(int slot) {
|
||||
return Long.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Long.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
public void setSingleSort() {
|
||||
}
|
||||
|
||||
/** Sorts by descending relevance. NOTE: if you are
|
||||
|
@ -486,69 +225,6 @@ public abstract class FieldComparator<T> {
|
|||
}
|
||||
}
|
||||
|
||||
/** Sorts by ascending docID */
|
||||
public static final class DocComparator extends FieldComparator<Integer> implements LeafFieldComparator {
|
||||
private final int[] docIDs;
|
||||
private int docBase;
|
||||
private int bottom;
|
||||
private int topValue;
|
||||
|
||||
/** Creates a new comparator based on document ids for {@code numHits} */
|
||||
public DocComparator(int numHits) {
|
||||
docIDs = new int[numHits];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
// No overflow risk because docIDs are non-negative
|
||||
return docIDs[slot1] - docIDs[slot2];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
// No overflow risk because docIDs are non-negative
|
||||
return bottom - (docBase + doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) {
|
||||
docIDs[slot] = docBase + doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
|
||||
// TODO: can we "map" our docIDs to the current
|
||||
// reader? saves having to then subtract on every
|
||||
// compare call
|
||||
this.docBase = context.docBase;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = docIDs[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Integer value) {
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(docIDs[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) {
|
||||
int docValue = docBase + doc;
|
||||
return Integer.compare(topValue, docValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) {}
|
||||
}
|
||||
|
||||
/** Sorts by field's natural Term sort order, using
|
||||
* ordinals. This is functionally equivalent to {@link
|
||||
* org.apache.lucene.search.FieldComparator.TermValComparator}, but it first resolves the string
|
||||
|
|
|
@ -58,8 +58,8 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
private final int oneReverseMul;
|
||||
private final FieldComparator<?> oneComparator;
|
||||
|
||||
public OneComparatorFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
|
||||
super(fields, size, filterNonCompetitiveDocs);
|
||||
public OneComparatorFieldValueHitQueue(SortField[] fields, int size) {
|
||||
super(fields, size);
|
||||
|
||||
assert fields.length == 1;
|
||||
oneComparator = comparators[0];
|
||||
|
@ -95,8 +95,8 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
*/
|
||||
private static final class MultiComparatorsFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> {
|
||||
|
||||
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
|
||||
super(fields, size, filterNonCompetitiveDocs);
|
||||
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size) {
|
||||
super(fields, size);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -121,7 +121,7 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
}
|
||||
|
||||
// prevent instantiation and extension.
|
||||
private FieldValueHitQueue(SortField[] fields, int size, boolean filterNonCompetitiveDocs) {
|
||||
private FieldValueHitQueue(SortField[] fields, int size) {
|
||||
super(size);
|
||||
// When we get here, fields.length is guaranteed to be > 0, therefore no
|
||||
// need to check it again.
|
||||
|
@ -136,14 +136,12 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
for (int i = 0; i < numComparators; ++i) {
|
||||
SortField field = fields[i];
|
||||
reverseMul[i] = field.reverse ? -1 : 1;
|
||||
if (i == 0 && filterNonCompetitiveDocs) {
|
||||
// try to rewrite the 1st comparator to the comparator that can skip non-competitive documents
|
||||
// skipping functionality is beneficial only for the 1st comparator
|
||||
comparators[i] = FilteringFieldComparator.wrapToFilteringComparator(field.getComparator(size, i),
|
||||
field.reverse, numComparators == 1);
|
||||
} else {
|
||||
comparators[i] = field.getComparator(size, i);
|
||||
}
|
||||
if (numComparators == 1) {
|
||||
// inform a comparator that sort is based on this single field
|
||||
// to enable some optimizations for skipping over non-competitive documents
|
||||
comparators[0].setSingleSort();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -158,20 +156,17 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> ext
|
|||
* priority first); cannot be <code>null</code> or empty
|
||||
* @param size
|
||||
* The number of hits to retain. Must be greater than zero.
|
||||
* @param filterNonCompetitiveDocs
|
||||
* {@code true} If comparators should be allowed to filter non-competitive documents, {@code false} otherwise
|
||||
*/
|
||||
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size,
|
||||
boolean filterNonCompetitiveDocs) {
|
||||
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size) {
|
||||
|
||||
if (fields.length == 0) {
|
||||
throw new IllegalArgumentException("Sort must contain at least one field");
|
||||
}
|
||||
|
||||
if (fields.length == 1) {
|
||||
return new OneComparatorFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
|
||||
return new OneComparatorFieldValueHitQueue<>(fields, size);
|
||||
} else {
|
||||
return new MultiComparatorsFieldValueHitQueue<>(fields, size, filterNonCompetitiveDocs);
|
||||
return new MultiComparatorsFieldValueHitQueue<>(fields, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,93 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A wrapper over {@code FieldComparator} that provides a leaf comparator that can filter non-competitive docs.
|
||||
*/
|
||||
abstract class FilteringFieldComparator<T> extends FieldComparator<T> {
|
||||
protected final FieldComparator<T> in;
|
||||
protected final boolean reverse;
|
||||
// singleSort is true, if sort is based on a single sort field. As there are no other sorts configured
|
||||
// as tie breakers, we can filter out docs with equal values.
|
||||
protected final boolean singleSort;
|
||||
protected boolean hasTopValue = false;
|
||||
|
||||
public FilteringFieldComparator(FieldComparator<T> in, boolean reverse, boolean singleSort) {
|
||||
this.in = in;
|
||||
this.reverse = reverse;
|
||||
this.singleSort = singleSort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException;
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return in.compare(slot1, slot2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T value(int slot) {
|
||||
return in.value(slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(T value) {
|
||||
in.setTopValue(value);
|
||||
hasTopValue = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareValues(T first, T second) {
|
||||
return in.compareValues(first, second);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Try to wrap a given field comparator to add to it a functionality to skip over non-competitive docs.
|
||||
* If for the given comparator the skip functionality is not implemented, return the comparator itself.
|
||||
* @param comparator – comparator to wrap
|
||||
* @param reverse – if this sort is reverse
|
||||
* @param singleSort – true if this sort is based on a single field and there are no other sort fields for tie breaking
|
||||
* @return comparator wrapped as a filtering comparator or the original comparator if the filtering functionality
|
||||
* is not implemented for it
|
||||
*/
|
||||
public static FieldComparator<?> wrapToFilteringComparator(FieldComparator<?> comparator, boolean reverse, boolean singleSort) {
|
||||
Class<?> comparatorClass = comparator.getClass();
|
||||
if (comparatorClass == FieldComparator.LongComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.LongComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
if (comparatorClass == FieldComparator.IntComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.IntComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
if (comparatorClass == FieldComparator.DoubleComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.DoubleComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
if (comparatorClass == FieldComparator.FloatComparator.class){
|
||||
return new FilteringNumericComparator<>((FieldComparator.FloatComparator) comparator, reverse, singleSort);
|
||||
}
|
||||
return comparator;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Decorates a wrapped LeafFieldComparator to add a functionality to skip over non-competitive docs.
|
||||
* FilteringLeafFieldComparator provides two additional functions to a LeafFieldComparator:
|
||||
* {@code competitiveIterator()} and {@code setCanUpdateIterator()}.
|
||||
*/
|
||||
public interface FilteringLeafFieldComparator extends LeafFieldComparator {
|
||||
/**
|
||||
* Returns a competitive iterator
|
||||
* @return an iterator over competitive docs that are stronger than already collected docs
|
||||
* or {@code null} if such an iterator is not available for the current segment.
|
||||
*/
|
||||
DocIdSetIterator competitiveIterator() throws IOException;
|
||||
|
||||
/**
|
||||
* Informs this leaf comparator that it is allowed to start updating its competitive iterator.
|
||||
* This method is called from a collector when queue becomes full and threshold is reached.
|
||||
*/
|
||||
void setCanUpdateIterator() throws IOException;
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A wrapper over {@code NumericComparator} that provides a leaf comparator that can filter non-competitive docs.
|
||||
*/
|
||||
class FilteringNumericComparator<T extends Number> extends FilteringFieldComparator<T> {
|
||||
public FilteringNumericComparator(NumericComparator<T> in, boolean reverse, boolean singleSort) {
|
||||
super(in, reverse, singleSort);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final FilteringLeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
LeafFieldComparator inLeafComparator = in.getLeafComparator(context);
|
||||
Class<?> comparatorClass = inLeafComparator.getClass();
|
||||
if (comparatorClass == FieldComparator.LongComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringLongLeafComparator((FieldComparator.LongComparator) inLeafComparator, context,
|
||||
((LongComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} if (comparatorClass == FieldComparator.IntComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringIntLeafComparator((FieldComparator.IntComparator) inLeafComparator, context,
|
||||
((IntComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} else if (comparatorClass == FieldComparator.DoubleComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringDoubleLeafComparator((FieldComparator.DoubleComparator) inLeafComparator, context,
|
||||
((DoubleComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} else if (comparatorClass == FieldComparator.FloatComparator.class) {
|
||||
return new FilteringNumericLeafComparator.FilteringFloatLeafComparator((FieldComparator.FloatComparator) inLeafComparator, context,
|
||||
((FloatComparator) inLeafComparator).field, reverse, singleSort, hasTopValue);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected numeric class of ["+ comparatorClass + "] for [FieldComparator]!");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,336 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* A {@code FilteringLeafFieldComparator} that provides a functionality to skip over non-competitive documents
|
||||
* for numeric fields indexed with points.
|
||||
*/
|
||||
abstract class FilteringNumericLeafComparator implements FilteringLeafFieldComparator {
|
||||
protected final LeafFieldComparator in;
|
||||
protected final boolean reverse;
|
||||
protected final boolean singleSort; //if sort is based on a single sort field as opposed to multiple sort fields
|
||||
private final boolean hasTopValue;
|
||||
private final PointValues pointValues;
|
||||
private final int bytesCount;
|
||||
private final int maxDoc;
|
||||
private final byte[] minValueAsBytes;
|
||||
private final byte[] maxValueAsBytes;
|
||||
|
||||
private long iteratorCost;
|
||||
private int maxDocVisited = 0;
|
||||
private int updateCounter = 0;
|
||||
private boolean canUpdateIterator = false; // set to true when queue becomes full and hitsThreshold is reached
|
||||
private DocIdSetIterator competitiveIterator;
|
||||
|
||||
public FilteringNumericLeafComparator(LeafFieldComparator in, LeafReaderContext context, String field,
|
||||
boolean reverse, boolean singleSort, boolean hasTopValue, int bytesCount) throws IOException {
|
||||
this.in = in;
|
||||
this.pointValues = context.reader().getPointValues(field);
|
||||
this.reverse = reverse;
|
||||
this.singleSort = singleSort;
|
||||
this.hasTopValue = hasTopValue;
|
||||
this.maxDoc = context.reader().maxDoc();
|
||||
this.bytesCount = bytesCount;
|
||||
this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : hasTopValue ? new byte[bytesCount] : null;
|
||||
this.minValueAsBytes = reverse ? new byte[bytesCount] : hasTopValue ? new byte[bytesCount] : null;
|
||||
|
||||
// TODO: optimize a case when pointValues are missing only on this segment
|
||||
this.competitiveIterator = pointValues == null ? null : DocIdSetIterator.all(maxDoc);
|
||||
this.iteratorCost = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
in.setBottom(slot);
|
||||
updateCompetitiveIterator(); // update an iterator if we set a new bottom
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return in.compareBottom(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return in.compareTop(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
in.copy(slot, doc);
|
||||
maxDocVisited = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
in.setScorer(scorer);
|
||||
if (scorer instanceof Scorer) {
|
||||
iteratorCost = ((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
|
||||
updateCompetitiveIterator(); // update an iterator when we have a new segment
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCanUpdateIterator() throws IOException {
|
||||
this.canUpdateIterator = true;
|
||||
updateCompetitiveIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() {
|
||||
if (competitiveIterator == null) return null;
|
||||
return new DocIdSetIterator() {
|
||||
private int doc;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return doc = competitiveIterator.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return competitiveIterator.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = competitiveIterator.advance(target);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// update its iterator to include possibly only docs that are "stronger" than the current bottom entry
|
||||
private void updateCompetitiveIterator() throws IOException {
|
||||
if (canUpdateIterator == false) return;
|
||||
if (pointValues == null) return;
|
||||
// if some documents have missing points, check that missing values prohibits optimization
|
||||
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
|
||||
return; // we can't filter out documents, as documents with missing values are competitive
|
||||
}
|
||||
|
||||
updateCounter++;
|
||||
if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
|
||||
return;
|
||||
}
|
||||
if (reverse == false) {
|
||||
encodeBottom(maxValueAsBytes);
|
||||
if (hasTopValue) {
|
||||
encodeTop(minValueAsBytes);
|
||||
}
|
||||
} else {
|
||||
encodeBottom(minValueAsBytes);
|
||||
if (hasTopValue) {
|
||||
encodeTop(maxValueAsBytes);
|
||||
}
|
||||
}
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
|
||||
PointValues.IntersectVisitor visitor = new PointValues.IntersectVisitor() {
|
||||
DocIdSetBuilder.BulkAdder adder;
|
||||
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
adder = result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // Already visited or skipped
|
||||
}
|
||||
adder.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // already visited or skipped
|
||||
}
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
|
||||
// if doc's value is too high or for single sort even equal, it is not competitive and the doc can be skipped
|
||||
if (cmp > 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
|
||||
// if doc's value is too low or for single sort even equal, it is not competitive and the doc can be skipped
|
||||
if (cmp < 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
adder.add(docID); // doc is competitive
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(minPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
|
||||
if (cmp > 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
|
||||
if (cmp < 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if ((maxValueAsBytes != null && Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount) > 0) ||
|
||||
(minValueAsBytes != null && Arrays.compareUnsigned(minPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount) < 0)) {
|
||||
return PointValues.Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
};
|
||||
final long threshold = iteratorCost >>> 3;
|
||||
long estimatedNumberOfMatches = pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||||
if (estimatedNumberOfMatches >= threshold) {
|
||||
// the new range is not selective enough to be worth materializing, it doesn't reduce number of docs at least 8x
|
||||
return;
|
||||
}
|
||||
pointValues.intersect(visitor);
|
||||
competitiveIterator = result.build().iterator();
|
||||
iteratorCost = competitiveIterator.cost();
|
||||
}
|
||||
|
||||
protected abstract boolean isMissingValueCompetitive();
|
||||
|
||||
protected abstract void encodeBottom(byte[] packedValue);
|
||||
|
||||
protected abstract void encodeTop(byte[] packedValue);
|
||||
|
||||
|
||||
/**
|
||||
* A wrapper over double long comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringLongLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringLongLeafComparator(FieldComparator.LongComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Long.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Long.compare(((FieldComparator.LongComparator) in).missingValue, ((FieldComparator.LongComparator) in).bottom);
|
||||
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
|
||||
// in asc sort missingValue is competitive when it's smaller or equal to bottom
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
LongPoint.encodeDimension(((FieldComparator.LongComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
LongPoint.encodeDimension(((FieldComparator.LongComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper over integer leaf comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringIntLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringIntLeafComparator(FieldComparator.IntComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Integer.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Integer.compare(((FieldComparator.IntComparator) in).missingValue, ((FieldComparator.IntComparator) in).bottom);
|
||||
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
|
||||
// in asc sort missingValue is competitive when it's smaller or equal to bottom
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
IntPoint.encodeDimension(((FieldComparator.IntComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
IntPoint.encodeDimension(((FieldComparator.IntComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper over double leaf comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringDoubleLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringDoubleLeafComparator(FieldComparator.DoubleComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Double.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Double.compare(((FieldComparator.DoubleComparator) in).missingValue, ((FieldComparator.DoubleComparator) in).bottom);
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
DoublePoint.encodeDimension(((FieldComparator.DoubleComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
DoublePoint.encodeDimension(((FieldComparator.DoubleComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper over float leaf comparator that adds a functionality to filter non-competitive docs.
|
||||
*/
|
||||
static class FilteringFloatLeafComparator extends FilteringNumericLeafComparator {
|
||||
public FilteringFloatLeafComparator(FieldComparator.FloatComparator in, LeafReaderContext context,
|
||||
String field, boolean reverse, boolean singleSort, boolean hasTopValue) throws IOException {
|
||||
super(in, context, field, reverse, singleSort, hasTopValue, Float.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Float.compare(((FieldComparator.FloatComparator) in).missingValue, ((FieldComparator.FloatComparator) in).bottom);
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
FloatPoint.encodeDimension(((FieldComparator.FloatComparator) in).bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
FloatPoint.encodeDimension(((FieldComparator.FloatComparator) in).topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -116,4 +116,20 @@ public interface LeafFieldComparator {
|
|||
* obtain the current hit's score, if necessary. */
|
||||
void setScorer(Scorable scorer) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a competitive iterator
|
||||
* @return an iterator over competitive docs that are stronger than already collected docs
|
||||
* or {@code null} if such an iterator is not available for the current comparator or segment.
|
||||
*/
|
||||
default DocIdSetIterator competitiveIterator() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Informs this leaf comparator that hits threshold is reached.
|
||||
* This method is called from a collector when hits threshold is reached.
|
||||
*/
|
||||
default void setHitsThresholdReached() throws IOException{
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Objects;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.comparators.LongComparator;
|
||||
|
||||
/**
|
||||
* Base class for producing {@link LongValues}
|
||||
|
@ -337,13 +338,16 @@ public abstract class LongValuesSource implements SegmentCacheable {
|
|||
@Override
|
||||
public FieldComparator<Long> newComparator(String fieldname, int numHits,
|
||||
int sortPos, boolean reversed) {
|
||||
return new FieldComparator.LongComparator(numHits, fieldname, missingValue) {
|
||||
|
||||
LeafReaderContext ctx;
|
||||
return new LongComparator(numHits, fieldname, missingValue, reversed, sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
LongValuesHolder holder = new LongValuesHolder();
|
||||
|
||||
return new LongComparator.LongLeafComparator(context) {
|
||||
LeafReaderContext ctx;
|
||||
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) {
|
||||
ctx = context;
|
||||
return asNumericDocValues(holder);
|
||||
}
|
||||
|
@ -351,6 +355,9 @@ public abstract class LongValuesSource implements SegmentCacheable {
|
|||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
holder.values = producer.getValues(ctx, DoubleValuesSource.fromScorer(scorer));
|
||||
super.setScorer(scorer);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -89,4 +89,15 @@ final class MultiLeafFieldComparator implements LeafFieldComparator {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHitsThresholdReached() throws IOException {
|
||||
// this is needed for skipping functionality that is only relevant for the 1st comparator
|
||||
firstComparator.setHitsThresholdReached();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() throws IOException {
|
||||
// this is needed for skipping functionality that is only relevant for the 1st comparator
|
||||
return firstComparator.competitiveIterator();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,11 @@ import java.util.Objects;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexSorter;
|
||||
import org.apache.lucene.index.SortFieldProvider;
|
||||
import org.apache.lucene.search.comparators.DocComparator;
|
||||
import org.apache.lucene.search.comparators.DoubleComparator;
|
||||
import org.apache.lucene.search.comparators.FloatComparator;
|
||||
import org.apache.lucene.search.comparators.IntComparator;
|
||||
import org.apache.lucene.search.comparators.LongComparator;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -452,19 +457,19 @@ public class SortField {
|
|||
return new FieldComparator.RelevanceComparator(numHits);
|
||||
|
||||
case DOC:
|
||||
return new FieldComparator.DocComparator(numHits);
|
||||
return new DocComparator(numHits, reverse, sortPos);
|
||||
|
||||
case INT:
|
||||
return new FieldComparator.IntComparator(numHits, field, (Integer) missingValue);
|
||||
return new IntComparator(numHits, field, (Integer) missingValue, reverse, sortPos);
|
||||
|
||||
case FLOAT:
|
||||
return new FieldComparator.FloatComparator(numHits, field, (Float) missingValue);
|
||||
return new FloatComparator(numHits, field, (Float) missingValue, reverse, sortPos);
|
||||
|
||||
case LONG:
|
||||
return new FieldComparator.LongComparator(numHits, field, (Long) missingValue);
|
||||
return new LongComparator(numHits, field, (Long) missingValue, reverse, sortPos);
|
||||
|
||||
case DOUBLE:
|
||||
return new FieldComparator.DoubleComparator(numHits, field, (Double) missingValue);
|
||||
return new DoubleComparator(numHits, field, (Double) missingValue, reverse, sortPos);
|
||||
|
||||
case CUSTOM:
|
||||
assert comparatorSource != null;
|
||||
|
|
|
@ -26,6 +26,10 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortFieldProvider;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.comparators.DoubleComparator;
|
||||
import org.apache.lucene.search.comparators.FloatComparator;
|
||||
import org.apache.lucene.search.comparators.IntComparator;
|
||||
import org.apache.lucene.search.comparators.LongComparator;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
|
@ -225,33 +229,53 @@ public class SortedNumericSortField extends SortField {
|
|||
public FieldComparator<?> getComparator(int numHits, int sortPos) {
|
||||
switch(type) {
|
||||
case INT:
|
||||
return new FieldComparator.IntComparator(numHits, getField(), (Integer) missingValue) {
|
||||
return new IntComparator(numHits, getField(), (Integer) missingValue, reverse, sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new IntLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
case FLOAT:
|
||||
return new FieldComparator.FloatComparator(numHits, getField(), (Float) missingValue) {
|
||||
return new FloatComparator(numHits, getField(), (Float) missingValue, reverse, sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new FloatLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
case LONG:
|
||||
return new FieldComparator.LongComparator(numHits, getField(), (Long) missingValue) {
|
||||
return new LongComparator(numHits, getField(), (Long) missingValue, reverse, sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new LongLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
case DOUBLE:
|
||||
return new FieldComparator.DoubleComparator(numHits, getField(), (Double) missingValue) {
|
||||
return new DoubleComparator(numHits, getField(), (Double) missingValue, reverse, sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new DoubleLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
|
|
@ -49,13 +49,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
private static abstract class MultiComparatorLeafCollector implements LeafCollector {
|
||||
|
||||
final LeafFieldComparator comparator;
|
||||
final FilteringLeafFieldComparator filteringLeafComparator;
|
||||
final int reverseMul;
|
||||
Scorable scorer;
|
||||
|
||||
MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
|
||||
this.filteringLeafComparator = comparators[0] instanceof FilteringLeafFieldComparator ?
|
||||
(FilteringLeafFieldComparator) comparators[0] : null;
|
||||
if (comparators.length == 1) {
|
||||
this.reverseMul = reverseMul[0];
|
||||
this.comparator = comparators[0];
|
||||
|
@ -90,12 +87,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (filteringLeafComparator != null && queueFull &&
|
||||
hitsThresholdChecker.isThresholdReached() && totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// for the first time queue becomes full and hitsThreshold is reached,
|
||||
// notify leaf comparator that its competitive iterator can be updated
|
||||
filteringLeafComparator.setCanUpdateIterator();
|
||||
if (scoreMode.isExhaustive() == false && totalHitsRelation == TotalHits.Relation.EQUAL_TO &&
|
||||
hitsThresholdChecker.isThresholdReached()) {
|
||||
// for the first time hitsThreshold is reached, notify comparator about this
|
||||
comparator.setHitsThresholdReached();
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
}
|
||||
}
|
||||
|
@ -150,25 +145,17 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
if (minScoreAcc != null) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
if (filteringLeafComparator != null && queueFull && hitsThresholdChecker.isThresholdReached()) {
|
||||
// if queue became full and hitsThreshold was reached in previous segments,
|
||||
// notify this segment's leaf comparator that its competitive iterator can be updated
|
||||
filteringLeafComparator.setCanUpdateIterator();
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() throws IOException {
|
||||
if (filteringLeafComparator == null) {
|
||||
return null;
|
||||
} else {
|
||||
return filteringLeafComparator.competitiveIterator();
|
||||
}
|
||||
return comparator.competitiveIterator();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO: remove this code when all bulk scores similar to {@code DefaultBulkScorer} use collectors' iterator,
|
||||
// as early termination should be implemented in their respective comparators and removed from a collector
|
||||
static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
|
||||
return canEarlyTerminateOnDocId(searchSort) ||
|
||||
canEarlyTerminateOnPrefix(searchSort, indexSort);
|
||||
|
@ -339,8 +326,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
} else {
|
||||
relevanceComparator = null;
|
||||
canSetMinScore = false;
|
||||
if (firstComparator instanceof FilteringFieldComparator) {
|
||||
assert hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE;
|
||||
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
|
||||
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
|
||||
} else {
|
||||
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
|
@ -469,9 +455,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
throw new IllegalArgumentException("hitsThresholdChecker should not be null");
|
||||
}
|
||||
|
||||
// here we assume that if hitsThreshold was set, we let a comparator to skip non-competitive docs
|
||||
boolean filterNonCompetitiveDocs = hitsThresholdChecker.getHitsThreshold() == Integer.MAX_VALUE ? false : true;
|
||||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits, filterNonCompetitiveDocs);
|
||||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||
|
||||
if (after == null) {
|
||||
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Comparator that sorts by asc _doc
|
||||
*/
|
||||
public class DocComparator extends FieldComparator<Integer> {
|
||||
private final int[] docIDs;
|
||||
private final boolean enableSkipping; // if skipping functionality should be enabled
|
||||
private int bottom;
|
||||
private int topValue;
|
||||
private boolean topValueSet;
|
||||
private boolean bottomValueSet;
|
||||
private boolean hitsThresholdReached;
|
||||
|
||||
/** Creates a new comparator based on document ids for {@code numHits} */
|
||||
public DocComparator(int numHits, boolean reverse, int sortPost) {
|
||||
this.docIDs = new int[numHits];
|
||||
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
|
||||
this.enableSkipping = (reverse == false && sortPost == 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
// No overflow risk because docIDs are non-negative
|
||||
return docIDs[slot1] - docIDs[slot2];
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
|
||||
// TODO: can we "map" our docIDs to the current
|
||||
// reader? saves having to then subtract on every
|
||||
// compare call
|
||||
return new DocLeafComparator(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Integer value) {
|
||||
topValue = value;
|
||||
topValueSet = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(docIDs[slot]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* DocLeafComparator with skipping functionality.
|
||||
* When sort by _doc asc, after collecting top N matches and enough hits, the comparator
|
||||
* can skip all the following documents.
|
||||
* When sort by _doc asc and "top" document is set after which search should start,
|
||||
* the comparator provides an iterator that can quickly skip to the desired "top" document.
|
||||
*/
|
||||
private class DocLeafComparator implements LeafFieldComparator {
|
||||
private final int docBase;
|
||||
private final int minDoc;
|
||||
private final int maxDoc;
|
||||
private DocIdSetIterator competitiveIterator; // iterator that starts from topValue
|
||||
|
||||
public DocLeafComparator(LeafReaderContext context) {
|
||||
this.docBase = context.docBase;
|
||||
if (enableSkipping) {
|
||||
this.minDoc = topValue + 1;
|
||||
this.maxDoc = context.reader().maxDoc();
|
||||
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
||||
} else {
|
||||
this.minDoc = -1;
|
||||
this.maxDoc = -1;
|
||||
this.competitiveIterator = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) {
|
||||
bottom = docIDs[slot];
|
||||
bottomValueSet = true;
|
||||
updateIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
// No overflow risk because docIDs are non-negative
|
||||
return bottom - (docBase + doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) {
|
||||
int docValue = docBase + doc;
|
||||
return Integer.compare(topValue, docValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
docIDs[slot] = docBase + doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
// update an iterator on a new segment
|
||||
updateIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() {
|
||||
if (enableSkipping == false) {
|
||||
return null;
|
||||
} else {
|
||||
return new DocIdSetIterator() {
|
||||
private int doc;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return doc = competitiveIterator.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return competitiveIterator.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = competitiveIterator.advance(target);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHitsThresholdReached() {
|
||||
hitsThresholdReached = true;
|
||||
updateIterator();
|
||||
}
|
||||
|
||||
private void updateIterator() {
|
||||
if (enableSkipping == false || hitsThresholdReached == false) return;
|
||||
if (bottomValueSet) {
|
||||
// since we've collected top N matches, we can early terminate
|
||||
// Currently early termination on _doc is also implemented in TopFieldCollector, but this will be removed
|
||||
// once all bulk scores uses collectors' iterators
|
||||
competitiveIterator = DocIdSetIterator.empty();
|
||||
} else if (topValueSet) {
|
||||
// skip to the desired top doc
|
||||
if (docBase + maxDoc <= minDoc) {
|
||||
competitiveIterator = DocIdSetIterator.empty(); // skip this segment
|
||||
} else {
|
||||
int segmentMinDoc = Math.max(0, minDoc - docBase);
|
||||
competitiveIterator = new MinDocIterator(segmentMinDoc, maxDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Double#compare} for {@code numHits}.
|
||||
* This comparator provides a skipping functionality – an iterator that can skip over non-competitive documents.
|
||||
*/
|
||||
public class DoubleComparator extends NumericComparator<Double> {
|
||||
private final double[] values;
|
||||
protected double topValue;
|
||||
protected double bottom;
|
||||
|
||||
public DoubleComparator(int numHits, String field, Double missingValue, boolean reverse, int sortPos) {
|
||||
super(field, missingValue != null ? missingValue : 0.0, reverse, sortPos, Double.BYTES);
|
||||
values = new double[numHits];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Double.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Double value) {
|
||||
super.setTopValue(value);
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double value(int slot) {
|
||||
return Double.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new DoubleLeafComparator(context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Leaf comparator for {@link DoubleComparator} that provides skipping functionality
|
||||
*/
|
||||
public class DoubleLeafComparator extends NumericLeafComparator {
|
||||
|
||||
public DoubleLeafComparator(LeafReaderContext context) throws IOException {
|
||||
super(context);
|
||||
}
|
||||
|
||||
private double getValueForDoc(int doc) throws IOException {
|
||||
if (docValues.advanceExact(doc)) {
|
||||
return Double.longBitsToDouble(docValues.longValue());
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
bottom = values[slot];
|
||||
super.setBottom(slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Double.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Double.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
super.copy(slot, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Double.compare(missingValue, bottom);
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
DoublePoint.encodeDimension(bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
DoublePoint.encodeDimension(topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Float#compare} for {@code numHits}.
|
||||
* This comparator provides a skipping functionality – an iterator that can skip over non-competitive documents.
|
||||
*/
|
||||
public class FloatComparator extends NumericComparator<Float> {
|
||||
private final float[] values;
|
||||
protected float topValue;
|
||||
protected float bottom;
|
||||
|
||||
public FloatComparator(int numHits, String field, Float missingValue, boolean reverse, int sortPos) {
|
||||
super(field, missingValue != null ? missingValue : 0.0f, reverse, sortPos, Float.BYTES);
|
||||
values = new float[numHits];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Float.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Float value) {
|
||||
super.setTopValue(value);
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Float value(int slot) {
|
||||
return Float.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new FloatLeafComparator(context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Leaf comparator for {@link FloatComparator} that provides skipping functionality
|
||||
*/
|
||||
public class FloatLeafComparator extends NumericLeafComparator {
|
||||
|
||||
public FloatLeafComparator(LeafReaderContext context) throws IOException {
|
||||
super(context);
|
||||
}
|
||||
|
||||
private float getValueForDoc(int doc) throws IOException {
|
||||
if (docValues.advanceExact(doc)) {
|
||||
return Float.intBitsToFloat((int) docValues.longValue());
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
bottom = values[slot];
|
||||
super.setBottom(slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Float.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Float.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
super.copy(slot, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Float.compare(missingValue, bottom);
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
FloatPoint.encodeDimension(bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
FloatPoint.encodeDimension(topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Integer#compare} for {@code numHits}.
|
||||
* This comparator provides a skipping functionality – an iterator that can skip over non-competitive documents.
|
||||
*/
|
||||
public class IntComparator extends NumericComparator<Integer> {
|
||||
private final int[] values;
|
||||
protected int topValue;
|
||||
protected int bottom;
|
||||
|
||||
public IntComparator(int numHits, String field, Integer missingValue, boolean reverse, int sortPos) {
|
||||
super(field, missingValue != null ? missingValue : 0, reverse, sortPos, Integer.BYTES);
|
||||
values = new int[numHits];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Integer.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Integer value) {
|
||||
super.setTopValue(value);
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer value(int slot) {
|
||||
return Integer.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new IntLeafComparator(context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Leaf comparator for {@link IntComparator} that provides skipping functionality
|
||||
*/
|
||||
public class IntLeafComparator extends NumericLeafComparator {
|
||||
|
||||
public IntLeafComparator(LeafReaderContext context) throws IOException {
|
||||
super(context);
|
||||
}
|
||||
|
||||
private int getValueForDoc(int doc) throws IOException {
|
||||
if (docValues.advanceExact(doc)) {
|
||||
return (int) docValues.longValue();
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
bottom = values[slot];
|
||||
super.setBottom(slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Integer.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Integer.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
super.copy(slot, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Integer.compare(missingValue, bottom);
|
||||
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
|
||||
// in asc sort missingValue is competitive when it's smaller or equal to bottom
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
IntPoint.encodeDimension(bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
IntPoint.encodeDimension(topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Long#compare} for {@code numHits}.
|
||||
* This comparator provides a skipping functionality – an iterator that can skip over non-competitive documents.
|
||||
*/
|
||||
public class LongComparator extends NumericComparator<Long> {
|
||||
private final long[] values;
|
||||
protected long topValue;
|
||||
protected long bottom;
|
||||
|
||||
public LongComparator(int numHits, String field, Long missingValue, boolean reverse, int sortPos) {
|
||||
super(field,missingValue != null ? missingValue : 0L, reverse, sortPos, Long.BYTES);
|
||||
values = new long[numHits];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Long.compare(values[slot1], values[slot2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Long value) {
|
||||
super.setTopValue(value);
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long value(int slot) {
|
||||
return Long.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new LongLeafComparator(context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Leaf comparator for {@link LongComparator} that provides skipping functionality
|
||||
*/
|
||||
public class LongLeafComparator extends NumericLeafComparator {
|
||||
|
||||
public LongLeafComparator(LeafReaderContext context) throws IOException {
|
||||
super(context);
|
||||
}
|
||||
|
||||
private long getValueForDoc(int doc) throws IOException {
|
||||
if (docValues.advanceExact(doc)) {
|
||||
return docValues.longValue();
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
bottom = values[slot];
|
||||
super.setBottom(slot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return Long.compare(bottom, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
return Long.compare(topValue, getValueForDoc(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
values[slot] = getValueForDoc(doc);
|
||||
super.copy(slot, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMissingValueCompetitive() {
|
||||
int result = Long.compare(missingValue, bottom);
|
||||
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
|
||||
// in asc sort missingValue is competitive when it's smaller or equal to bottom
|
||||
return reverse ? (result >= 0) : (result <= 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeBottom(byte[] packedValue) {
|
||||
LongPoint.encodeDimension(bottom, packedValue, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeTop(byte[] packedValue) {
|
||||
LongPoint.encodeDimension(topValue, packedValue, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Docs iterator that starts iterating from a configurable minimum document
|
||||
*/
|
||||
public class MinDocIterator extends DocIdSetIterator {
|
||||
final int segmentMinDoc;
|
||||
final int maxDoc;
|
||||
int doc = -1;
|
||||
|
||||
MinDocIterator(int segmentMinDoc, int maxDoc) {
|
||||
this.segmentMinDoc = segmentMinDoc;
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > doc;
|
||||
if (doc == -1) {
|
||||
// skip directly to minDoc
|
||||
doc = Math.max(target, segmentMinDoc);
|
||||
} else {
|
||||
doc = target;
|
||||
}
|
||||
if (doc >= maxDoc) {
|
||||
doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return maxDoc - segmentMinDoc;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Abstract numeric comparator for comparing numeric values.
|
||||
* This comparator provides a skipping functionality – an iterator that can skip over non-competitive documents.
|
||||
*/
|
||||
public abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
|
||||
protected final T missingValue;
|
||||
protected final String field;
|
||||
protected final boolean reverse;
|
||||
protected final boolean primarySort;
|
||||
private final int bytesCount; // how many bytes are used to encode this number
|
||||
|
||||
protected boolean topValueSet;
|
||||
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
|
||||
protected boolean hitsThresholdReached;
|
||||
protected boolean queueFull;
|
||||
|
||||
protected NumericComparator(String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
|
||||
this.field = field;
|
||||
this.missingValue = missingValue;
|
||||
this.reverse = reverse;
|
||||
this.primarySort = (sortPos == 0);
|
||||
this.bytesCount = bytesCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(T value) {
|
||||
topValueSet = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSingleSort() {
|
||||
singleSort = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Leaf comparator for {@link NumericComparator} that provides skipping functionality
|
||||
*/
|
||||
public abstract class NumericLeafComparator implements LeafFieldComparator {
|
||||
protected final NumericDocValues docValues;
|
||||
private final PointValues pointValues;
|
||||
private final boolean enableSkipping; // if skipping functionality should be enabled
|
||||
private final int maxDoc;
|
||||
private final byte[] minValueAsBytes;
|
||||
private final byte[] maxValueAsBytes;
|
||||
|
||||
private DocIdSetIterator competitiveIterator;
|
||||
private long iteratorCost;
|
||||
private int maxDocVisited = 0;
|
||||
private int updateCounter = 0;
|
||||
|
||||
public NumericLeafComparator(LeafReaderContext context) throws IOException {
|
||||
this.docValues = getNumericDocValues(context, field);
|
||||
this.pointValues = primarySort ? context.reader().getPointValues(field) : null;
|
||||
if (pointValues != null) {
|
||||
this.enableSkipping = true; // skipping is enabled on primarySort and when points are available
|
||||
this.maxDoc = context.reader().maxDoc();
|
||||
this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
||||
this.minValueAsBytes = reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
||||
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
||||
this.iteratorCost = maxDoc;
|
||||
} else {
|
||||
this.enableSkipping = false;
|
||||
this.maxDoc = 0;
|
||||
this.maxValueAsBytes = null;
|
||||
this.minValueAsBytes = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Retrieves the NumericDocValues for the field in this segment */
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
return DocValues.getNumeric(context.reader(), field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
queueFull = true; // if we are setting bottom, it means that we have collected enough hits
|
||||
updateCompetitiveIterator(); // update an iterator if we set a new bottom
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
maxDocVisited = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
if (scorer instanceof Scorer) {
|
||||
iteratorCost = ((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
|
||||
updateCompetitiveIterator(); // update an iterator when we have a new segment
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHitsThresholdReached() throws IOException {
|
||||
hitsThresholdReached = true;
|
||||
updateCompetitiveIterator();
|
||||
}
|
||||
|
||||
// update its iterator to include possibly only docs that are "stronger" than the current bottom entry
|
||||
private void updateCompetitiveIterator() throws IOException {
|
||||
if (enableSkipping == false || hitsThresholdReached == false || queueFull == false) return;
|
||||
// if some documents have missing points, check that missing values prohibits optimization
|
||||
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
|
||||
return; // we can't filter out documents, as documents with missing values are competitive
|
||||
}
|
||||
|
||||
updateCounter++;
|
||||
if (updateCounter > 256 && (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
|
||||
return;
|
||||
}
|
||||
if (reverse == false) {
|
||||
encodeBottom(maxValueAsBytes);
|
||||
if (topValueSet) {
|
||||
encodeTop(minValueAsBytes);
|
||||
}
|
||||
} else {
|
||||
encodeBottom(minValueAsBytes);
|
||||
if (topValueSet) {
|
||||
encodeTop(maxValueAsBytes);
|
||||
}
|
||||
}
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
|
||||
PointValues.IntersectVisitor visitor = new PointValues.IntersectVisitor() {
|
||||
DocIdSetBuilder.BulkAdder adder;
|
||||
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
adder = result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // Already visited or skipped
|
||||
}
|
||||
adder.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // already visited or skipped
|
||||
}
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
|
||||
// if doc's value is too high or for single sort even equal, it is not competitive and the doc can be skipped
|
||||
if (cmp > 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(packedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
|
||||
// if doc's value is too low or for single sort even equal, it is not competitive and the doc can be skipped
|
||||
if (cmp < 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
adder.add(docID); // doc is competitive
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(minPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
|
||||
if (cmp > 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
|
||||
if (cmp < 0 || (singleSort && cmp == 0)) return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if ((maxValueAsBytes != null && Arrays.compareUnsigned(maxPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount) > 0) ||
|
||||
(minValueAsBytes != null && Arrays.compareUnsigned(minPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount) < 0)) {
|
||||
return PointValues.Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
};
|
||||
final long threshold = iteratorCost >>> 3;
|
||||
long estimatedNumberOfMatches = pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||||
if (estimatedNumberOfMatches >= threshold) {
|
||||
// the new range is not selective enough to be worth materializing, it doesn't reduce number of docs at least 8x
|
||||
return;
|
||||
}
|
||||
pointValues.intersect(visitor);
|
||||
competitiveIterator = result.build().iterator();
|
||||
iteratorCost = competitiveIterator.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() {
|
||||
if (enableSkipping == false) return null;
|
||||
return new DocIdSetIterator() {
|
||||
private int doc;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return doc = competitiveIterator.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return competitiveIterator.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = competitiveIterator.advance(target);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
protected abstract boolean isMissingValueCompetitive();
|
||||
|
||||
protected abstract void encodeBottom(byte[] packedValue);
|
||||
|
||||
protected abstract void encodeTop(byte[] packedValue);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Comparators, used to compare hits so as to determine their
|
||||
* sort order when collecting the top results with
|
||||
* {@link org.apache.lucene.search.TopFieldCollector}.
|
||||
*/
|
||||
package org.apache.lucene.search.comparators;
|
|
@ -17,20 +17,25 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatDocValuesField;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.apache.lucene.search.SortField.FIELD_DOC;
|
||||
import static org.apache.lucene.search.SortField.FIELD_SCORE;
|
||||
|
||||
public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
|
||||
|
@ -97,6 +102,14 @@ public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
|
|||
assertTrue(topDocs.totalHits.value < numDocs);
|
||||
}
|
||||
|
||||
{ // test that if numeric field is a secondary sort, no optimization is run
|
||||
final TopFieldCollector collector = TopFieldCollector.create(new Sort(FIELD_SCORE, sortField), numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(topDocs.scoreDocs.length, numHits);
|
||||
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
|
@ -290,5 +303,138 @@ public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testDocSortOptimizationWithAfter() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
final int numDocs = atLeast(150);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
writer.addDocument(doc);
|
||||
if ((i > 0) && (i % 50 == 0)) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
final IndexReader reader = DirectoryReader.open(writer);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final int numHits = 3;
|
||||
final int totalHitsThreshold = 3;
|
||||
final int[] searchAfters = {10, 140, numDocs - 4};
|
||||
for (int searchAfter : searchAfters) {
|
||||
// sort by _doc with search after should trigger optimization
|
||||
{
|
||||
final Sort sort = new Sort(FIELD_DOC);
|
||||
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter});
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(numHits, topDocs.scoreDocs.length);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
int expectedDocID = searchAfter + 1 + i;
|
||||
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
// check that very few docs were collected
|
||||
assertTrue(topDocs.totalHits.value < 10);
|
||||
}
|
||||
|
||||
// sort by _doc + _score with search after should trigger optimization
|
||||
{
|
||||
final Sort sort = new Sort(FIELD_DOC, FIELD_SCORE);
|
||||
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Object[]{searchAfter, 1.0f});
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(numHits, topDocs.scoreDocs.length);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
int expectedDocID = searchAfter + 1 + i;
|
||||
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
// assert that very few docs were collected
|
||||
assertTrue(topDocs.totalHits.value < 10);
|
||||
}
|
||||
|
||||
// sort by _doc desc should not trigger optimization
|
||||
{
|
||||
final Sort sort = new Sort(new SortField(null, SortField.Type.DOC, true));
|
||||
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter});
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(numHits, topDocs.scoreDocs.length);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
int expectedDocID = searchAfter - 1 - i;
|
||||
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
|
||||
}
|
||||
// assert that all documents were collected
|
||||
assertEquals(numDocs, topDocs.totalHits.value);
|
||||
}
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
public void testDocSortOptimization() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
final int numDocs = atLeast(100);
|
||||
int seg = 1;
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
doc.add(new LongPoint("lf", i));
|
||||
doc.add(new StoredField("slf", i));
|
||||
doc.add(new StringField("tf", "seg" + seg, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
if ((i > 0) && (i % 50 == 0)) {
|
||||
writer.commit();
|
||||
seg++;
|
||||
}
|
||||
}
|
||||
final IndexReader reader = DirectoryReader.open(writer);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
final int numHits = 3;
|
||||
final int totalHitsThreshold = 3;
|
||||
final Sort sort = new Sort(FIELD_DOC);
|
||||
|
||||
// sort by _doc should skip all non-competitive documents
|
||||
{
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(numHits, topDocs.scoreDocs.length);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
assertEquals(i, topDocs.scoreDocs[i].doc);
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
assertTrue(topDocs.totalHits.value < 10); // assert that very few docs were collected
|
||||
}
|
||||
|
||||
// sort by _doc with a bool query should skip all non-competitive documents
|
||||
{
|
||||
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
|
||||
int lowerRange = 40;
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.add(LongPoint.newRangeQuery("lf", lowerRange, Long.MAX_VALUE), BooleanClause.Occur.MUST);
|
||||
bq.add(new TermQuery(new Term("tf", "seg1")), BooleanClause.Occur.MUST);
|
||||
searcher.search(bq.build(), collector);
|
||||
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
assertEquals(numHits, topDocs.scoreDocs.length);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
Document d = searcher.doc(topDocs.scoreDocs[i].doc);
|
||||
assertEquals(Integer.toString(i + lowerRange), d.get("slf"));
|
||||
assertEquals("seg1", d.get("tf"));
|
||||
}
|
||||
assertTrue(collector.isEarlyTerminated());
|
||||
assertTrue(topDocs.totalHits.value < 10); // assert that very few docs were collected
|
||||
}
|
||||
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ public class TestNeedsScores extends LuceneTestCase {
|
|||
|
||||
/** when not sorting by score */
|
||||
public void testSortByField() throws Exception {
|
||||
Query query = new AssertNeedsScores(new MatchAllDocsQuery(), ScoreMode.COMPLETE_NO_SCORES);
|
||||
Query query = new AssertNeedsScores(new MatchAllDocsQuery(), ScoreMode.TOP_DOCS);
|
||||
assertEquals(5, searcher.search(query, 5, Sort.INDEXORDER).totalHits.value);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,12 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.comparators.DoubleComparator;
|
||||
import org.apache.lucene.search.comparators.FloatComparator;
|
||||
import org.apache.lucene.search.comparators.IntComparator;
|
||||
import org.apache.lucene.search.comparators.LongComparator;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
|
||||
|
@ -94,13 +99,13 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||
case STRING:
|
||||
return getStringComparator(numHits);
|
||||
case DOUBLE:
|
||||
return getDoubleComparator(numHits);
|
||||
return getDoubleComparator(numHits, sortPos);
|
||||
case FLOAT:
|
||||
return getFloatComparator(numHits);
|
||||
return getFloatComparator(numHits, sortPos);
|
||||
case LONG:
|
||||
return getLongComparator(numHits);
|
||||
return getLongComparator(numHits, sortPos);
|
||||
case INT:
|
||||
return getIntComparator(numHits);
|
||||
return getIntComparator(numHits, sortPos);
|
||||
default:
|
||||
throw new UnsupportedOperationException("Sort type " + getType() + " is not supported");
|
||||
}
|
||||
|
@ -126,8 +131,11 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||
};
|
||||
}
|
||||
|
||||
private FieldComparator<?> getIntComparator(int numHits) {
|
||||
return new FieldComparator.IntComparator(numHits, getField(), (Integer) missingValue) {
|
||||
private FieldComparator<?> getIntComparator(int numHits, int sortPos) {
|
||||
return new IntComparator(numHits, getField(), (Integer) missingValue, getReverse(), sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new IntLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
|
||||
|
@ -143,9 +151,14 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private FieldComparator<?> getLongComparator(int numHits) {
|
||||
return new FieldComparator.LongComparator(numHits, getField(), (Long) missingValue) {
|
||||
private FieldComparator<?> getLongComparator(int numHits, int sortPos) {
|
||||
return new LongComparator(numHits, getField(), (Long) missingValue, getReverse(), sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new LongLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
|
||||
|
@ -161,9 +174,14 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private FieldComparator<?> getFloatComparator(int numHits) {
|
||||
return new FieldComparator.FloatComparator(numHits, getField(), (Float) missingValue) {
|
||||
private FieldComparator<?> getFloatComparator(int numHits, int sortPos) {
|
||||
return new FloatComparator(numHits, getField(), (Float) missingValue, getReverse(), sortPos) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new FloatLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
|
||||
|
@ -184,10 +202,15 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||
};
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
private FieldComparator<?> getDoubleComparator(int numHits) {
|
||||
return new FieldComparator.DoubleComparator(numHits, getField(), (Double) missingValue) {
|
||||
private FieldComparator<?> getDoubleComparator(int numHits, int sortPost) {
|
||||
return new DoubleComparator(numHits, getField(), (Double) missingValue, getReverse(), sortPost) {
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
|
||||
return new DoubleLeafComparator(context) {
|
||||
@Override
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
|
||||
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
|
||||
|
@ -209,6 +232,8 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
|
Loading…
Reference in New Issue