mirror of https://github.com/apache/lucene.git
Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)
* Leverage DISI static factory methods more over custom DISI impl where possible. * Assert points field is a single-dim. * Bound cost estimate by the cost of the doc values field (for sparse fields).
This commit is contained in:
parent
54e00df7f6
commit
8671e29929
|
@ -157,6 +157,8 @@ Improvements
|
|||
* GITHUB#11860: Improve storage efficiency of connections in the HNSW graph that Lucene uses for
|
||||
vector search. (Ben Trent)
|
||||
|
||||
* GITHUB#12003: Minor cleanup/improvements to IndexSortSortedNumericDocValuesRangeQuery. (Greg Miller)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#11726: Indexing term vectors on large documents could fail due to
|
||||
|
|
|
@ -176,8 +176,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
@Override
|
||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||
final Weight weight = this;
|
||||
DocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
|
||||
if (disi != null) {
|
||||
IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
|
||||
if (itAndCount != null) {
|
||||
DocIdSetIterator disi = itAndCount.it;
|
||||
return new ScorerSupplier() {
|
||||
@Override
|
||||
public Scorer get(long leadCost) throws IOException {
|
||||
|
@ -212,9 +213,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
@Override
|
||||
public int count(LeafReaderContext context) throws IOException {
|
||||
if (context.reader().hasDeletions() == false) {
|
||||
BoundedDocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
|
||||
if (disi != null && disi.delegate == null) {
|
||||
return disi.lastDoc - disi.firstDoc;
|
||||
IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
|
||||
if (itAndCount != null && itAndCount.count != -1) {
|
||||
return itAndCount.count;
|
||||
}
|
||||
}
|
||||
return fallbackWeight.count(context);
|
||||
|
@ -406,119 +407,123 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
|
||||
private boolean matchNone(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
|
||||
throws IOException {
|
||||
assert points.getNumDimensions() == 1;
|
||||
final ByteArrayComparator comparator =
|
||||
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
|
||||
for (int dim = 0; dim < points.getNumDimensions(); dim++) {
|
||||
int offset = dim * points.getBytesPerDimension();
|
||||
if (comparator.compare(points.getMinPackedValue(), offset, queryUpperPoint, offset) > 0
|
||||
|| comparator.compare(points.getMaxPackedValue(), offset, queryLowerPoint, offset) < 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return comparator.compare(points.getMinPackedValue(), 0, queryUpperPoint, 0) > 0
|
||||
|| comparator.compare(points.getMaxPackedValue(), 0, queryLowerPoint, 0) < 0;
|
||||
}
|
||||
|
||||
private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
|
||||
throws IOException {
|
||||
assert points.getNumDimensions() == 1;
|
||||
final ByteArrayComparator comparator =
|
||||
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
|
||||
for (int dim = 0; dim < points.getNumDimensions(); dim++) {
|
||||
int offset = dim * points.getBytesPerDimension();
|
||||
if (comparator.compare(points.getMinPackedValue(), offset, queryLowerPoint, offset) >= 0
|
||||
&& comparator.compare(points.getMaxPackedValue(), offset, queryUpperPoint, offset) <= 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return comparator.compare(points.getMinPackedValue(), 0, queryLowerPoint, 0) >= 0
|
||||
&& comparator.compare(points.getMaxPackedValue(), 0, queryUpperPoint, 0) <= 0;
|
||||
}
|
||||
|
||||
private BoundedDocIdSetIterator getDocIdSetIteratorOrNullFromBkd(
|
||||
private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(
|
||||
LeafReaderContext context, DocIdSetIterator delegate) throws IOException {
|
||||
Sort indexSort = context.reader().getMetaData().getSort();
|
||||
if (indexSort != null
|
||||
&& indexSort.getSort().length > 0
|
||||
&& indexSort.getSort()[0].getField().equals(field)) {
|
||||
final boolean reverse = indexSort.getSort()[0].getReverse();
|
||||
PointValues points = context.reader().getPointValues(field);
|
||||
if (points == null) {
|
||||
return null;
|
||||
}
|
||||
if (indexSort == null
|
||||
|| indexSort.getSort().length == 0
|
||||
|| indexSort.getSort()[0].getField().equals(field) == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (points.getNumDimensions() != 1) {
|
||||
return null;
|
||||
}
|
||||
final boolean reverse = indexSort.getSort()[0].getReverse();
|
||||
|
||||
if (points.getBytesPerDimension() != Long.BYTES
|
||||
&& points.getBytesPerDimension() != Integer.BYTES) {
|
||||
return null;
|
||||
}
|
||||
PointValues points = context.reader().getPointValues(field);
|
||||
if (points == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (points.size() != points.getDocCount()) {
|
||||
return null;
|
||||
}
|
||||
if (points.getNumDimensions() != 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
byte[] queryLowerPoint;
|
||||
byte[] queryUpperPoint;
|
||||
if (points.getBytesPerDimension() == Integer.BYTES) {
|
||||
queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
|
||||
queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
|
||||
if (points.getBytesPerDimension() != Long.BYTES
|
||||
&& points.getBytesPerDimension() != Integer.BYTES) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (points.size() != points.getDocCount()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
assert lowerValue <= upperValue;
|
||||
byte[] queryLowerPoint;
|
||||
byte[] queryUpperPoint;
|
||||
if (points.getBytesPerDimension() == Integer.BYTES) {
|
||||
queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
|
||||
queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
|
||||
} else {
|
||||
queryLowerPoint = LongPoint.pack(lowerValue).bytes;
|
||||
queryUpperPoint = LongPoint.pack(upperValue).bytes;
|
||||
}
|
||||
if (matchNone(points, queryLowerPoint, queryUpperPoint)) {
|
||||
return IteratorAndCount.empty();
|
||||
}
|
||||
if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
|
||||
int maxDoc = context.reader().maxDoc();
|
||||
if (points.getDocCount() == maxDoc) {
|
||||
return IteratorAndCount.all(maxDoc);
|
||||
} else {
|
||||
queryLowerPoint = LongPoint.pack(lowerValue).bytes;
|
||||
queryUpperPoint = LongPoint.pack(upperValue).bytes;
|
||||
}
|
||||
if (lowerValue > upperValue || matchNone(points, queryLowerPoint, queryUpperPoint)) {
|
||||
return new BoundedDocIdSetIterator(0, 0, null);
|
||||
}
|
||||
int minDocId, maxDocId;
|
||||
if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
|
||||
minDocId = 0;
|
||||
maxDocId = context.reader().maxDoc();
|
||||
} else {
|
||||
final ByteArrayComparator comparator =
|
||||
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
|
||||
|
||||
if (reverse) {
|
||||
minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
|
||||
} else {
|
||||
minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
|
||||
if (minDocId == -1) {
|
||||
// No matches
|
||||
return new BoundedDocIdSetIterator(0, 0, null);
|
||||
}
|
||||
}
|
||||
|
||||
if (reverse) {
|
||||
maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
|
||||
if (maxDocId == 0) {
|
||||
// No matches
|
||||
return new BoundedDocIdSetIterator(0, 0, null);
|
||||
}
|
||||
} else {
|
||||
maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
|
||||
if (maxDocId == -1) {
|
||||
maxDocId = context.reader().maxDoc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((points.getDocCount() == context.reader().maxDoc())) {
|
||||
return new BoundedDocIdSetIterator(minDocId, maxDocId, null);
|
||||
} else {
|
||||
return new BoundedDocIdSetIterator(minDocId, maxDocId, delegate);
|
||||
return IteratorAndCount.sparseRange(0, maxDoc, delegate);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
||||
int minDocId, maxDocId;
|
||||
final ByteArrayComparator comparator =
|
||||
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
|
||||
|
||||
if (reverse) {
|
||||
minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
|
||||
} else {
|
||||
minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
|
||||
if (minDocId == -1) {
|
||||
// No matches
|
||||
return IteratorAndCount.empty();
|
||||
}
|
||||
}
|
||||
|
||||
if (reverse) {
|
||||
maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
|
||||
if (maxDocId == 0) {
|
||||
// No matches
|
||||
return IteratorAndCount.empty();
|
||||
}
|
||||
} else {
|
||||
maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
|
||||
if (maxDocId == -1) {
|
||||
maxDocId = context.reader().maxDoc();
|
||||
}
|
||||
}
|
||||
|
||||
if (minDocId == maxDocId) {
|
||||
return IteratorAndCount.empty();
|
||||
}
|
||||
|
||||
if ((points.getDocCount() == context.reader().maxDoc())) {
|
||||
return IteratorAndCount.denseRange(minDocId, maxDocId);
|
||||
} else {
|
||||
return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate);
|
||||
}
|
||||
}
|
||||
|
||||
private BoundedDocIdSetIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
|
||||
throws IOException {
|
||||
private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) throws IOException {
|
||||
if (lowerValue > upperValue) {
|
||||
return IteratorAndCount.empty();
|
||||
}
|
||||
|
||||
SortedNumericDocValues sortedNumericValues =
|
||||
DocValues.getSortedNumeric(context.reader(), field);
|
||||
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
|
||||
if (numericValues != null) {
|
||||
BoundedDocIdSetIterator iterator = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
|
||||
if (iterator != null) {
|
||||
return iterator;
|
||||
IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
|
||||
if (itAndCount != null) {
|
||||
return itAndCount;
|
||||
}
|
||||
Sort indexSort = context.reader().getMetaData().getSort();
|
||||
if (indexSort != null
|
||||
|
@ -548,7 +553,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
|
||||
* no value.
|
||||
*/
|
||||
private BoundedDocIdSetIterator getDocIdSetIterator(
|
||||
private IteratorAndCount getDocIdSetIterator(
|
||||
SortField sortField,
|
||||
SortField.Type sortFieldType,
|
||||
LeafReaderContext context,
|
||||
|
@ -592,19 +597,22 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
}
|
||||
|
||||
int lastDocIdExclusive = high + 1;
|
||||
|
||||
if (firstDocIdInclusive == lastDocIdExclusive) {
|
||||
return IteratorAndCount.empty();
|
||||
}
|
||||
|
||||
Object missingValue = sortField.getMissingValue();
|
||||
BoundedDocIdSetIterator disi;
|
||||
LeafReader reader = context.reader();
|
||||
PointValues pointValues = reader.getPointValues(field);
|
||||
final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
|
||||
// all documents have docValues or missing value falls outside the range
|
||||
if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
|
||||
|| (missingLongValue < lowerValue || missingLongValue > upperValue)) {
|
||||
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, null);
|
||||
return IteratorAndCount.denseRange(firstDocIdInclusive, lastDocIdExclusive);
|
||||
} else {
|
||||
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
|
||||
return IteratorAndCount.sparseRange(firstDocIdInclusive, lastDocIdExclusive, delegate);
|
||||
}
|
||||
return disi;
|
||||
}
|
||||
|
||||
/** Compares the given document's value with a stored reference value. */
|
||||
|
@ -643,6 +651,29 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a {@code DocIdSetIterator} along with an accurate count of documents provided by the
|
||||
* iterator (or {@code -1} if an accurate count is unknown).
|
||||
*/
|
||||
private record IteratorAndCount(DocIdSetIterator it, int count) {
|
||||
|
||||
static IteratorAndCount empty() {
|
||||
return new IteratorAndCount(DocIdSetIterator.empty(), 0);
|
||||
}
|
||||
|
||||
static IteratorAndCount all(int maxDoc) {
|
||||
return new IteratorAndCount(DocIdSetIterator.all(maxDoc), maxDoc);
|
||||
}
|
||||
|
||||
static IteratorAndCount denseRange(int minDoc, int maxDoc) {
|
||||
return new IteratorAndCount(DocIdSetIterator.range(minDoc, maxDoc), maxDoc - minDoc);
|
||||
}
|
||||
|
||||
static IteratorAndCount sparseRange(int minDoc, int maxDoc, DocIdSetIterator delegate) {
|
||||
return new IteratorAndCount(new BoundedDocIdSetIterator(minDoc, maxDoc, delegate), -1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
|
||||
* [firstDocInclusive, lastDoc).
|
||||
|
@ -655,6 +686,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
private int docID = -1;
|
||||
|
||||
BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
|
||||
assert delegate != null;
|
||||
this.firstDoc = firstDoc;
|
||||
this.lastDoc = lastDoc;
|
||||
this.delegate = delegate;
|
||||
|
@ -676,12 +708,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
target = firstDoc;
|
||||
}
|
||||
|
||||
int result;
|
||||
if (delegate != null) {
|
||||
result = delegate.advance(target);
|
||||
} else {
|
||||
result = target;
|
||||
}
|
||||
int result = delegate.advance(target);
|
||||
if (result < lastDoc) {
|
||||
docID = result;
|
||||
} else {
|
||||
|
@ -692,7 +719,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
|
|||
|
||||
@Override
|
||||
public long cost() {
|
||||
return lastDoc - firstDoc;
|
||||
return Math.min(delegate.cost(), lastDoc - firstDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue