Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)

* Leverage DISI static factory methods more over custom DISI impl where possible.
* Assert points field is a single-dim.
* Bound cost estimate by the cost of the doc values field (for sparse fields).
This commit is contained in:
Greg Miller 2022-12-10 12:23:31 -08:00 committed by GitHub
parent 54e00df7f6
commit 8671e29929
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 134 additions and 105 deletions

View File

@ -157,6 +157,8 @@ Improvements
* GITHUB#11860: Improve storage efficiency of connections in the HNSW graph that Lucene uses for
vector search. (Ben Trent)
* GITHUB#12003: Minor cleanup/improvements to IndexSortSortedNumericDocValuesRangeQuery. (Greg Miller)
Bug Fixes
---------------------
* GITHUB#11726: Indexing term vectors on large documents could fail due to

View File

@ -176,8 +176,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight weight = this;
DocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
if (itAndCount != null) {
DocIdSetIterator disi = itAndCount.it;
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
@ -212,9 +213,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
@Override
public int count(LeafReaderContext context) throws IOException {
if (context.reader().hasDeletions() == false) {
BoundedDocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null && disi.delegate == null) {
return disi.lastDoc - disi.firstDoc;
IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
if (itAndCount != null && itAndCount.count != -1) {
return itAndCount.count;
}
}
return fallbackWeight.count(context);
@ -406,119 +407,123 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
private boolean matchNone(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
throws IOException {
assert points.getNumDimensions() == 1;
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
for (int dim = 0; dim < points.getNumDimensions(); dim++) {
int offset = dim * points.getBytesPerDimension();
if (comparator.compare(points.getMinPackedValue(), offset, queryUpperPoint, offset) > 0
|| comparator.compare(points.getMaxPackedValue(), offset, queryLowerPoint, offset) < 0) {
return true;
}
}
return false;
return comparator.compare(points.getMinPackedValue(), 0, queryUpperPoint, 0) > 0
|| comparator.compare(points.getMaxPackedValue(), 0, queryLowerPoint, 0) < 0;
}
private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
throws IOException {
assert points.getNumDimensions() == 1;
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
for (int dim = 0; dim < points.getNumDimensions(); dim++) {
int offset = dim * points.getBytesPerDimension();
if (comparator.compare(points.getMinPackedValue(), offset, queryLowerPoint, offset) >= 0
&& comparator.compare(points.getMaxPackedValue(), offset, queryUpperPoint, offset) <= 0) {
return true;
}
}
return false;
return comparator.compare(points.getMinPackedValue(), 0, queryLowerPoint, 0) >= 0
&& comparator.compare(points.getMaxPackedValue(), 0, queryUpperPoint, 0) <= 0;
}
private BoundedDocIdSetIterator getDocIdSetIteratorOrNullFromBkd(
private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(
LeafReaderContext context, DocIdSetIterator delegate) throws IOException {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {
final boolean reverse = indexSort.getSort()[0].getReverse();
PointValues points = context.reader().getPointValues(field);
if (points == null) {
return null;
}
if (indexSort == null
|| indexSort.getSort().length == 0
|| indexSort.getSort()[0].getField().equals(field) == false) {
return null;
}
if (points.getNumDimensions() != 1) {
return null;
}
final boolean reverse = indexSort.getSort()[0].getReverse();
if (points.getBytesPerDimension() != Long.BYTES
&& points.getBytesPerDimension() != Integer.BYTES) {
return null;
}
PointValues points = context.reader().getPointValues(field);
if (points == null) {
return null;
}
if (points.size() != points.getDocCount()) {
return null;
}
if (points.getNumDimensions() != 1) {
return null;
}
byte[] queryLowerPoint;
byte[] queryUpperPoint;
if (points.getBytesPerDimension() == Integer.BYTES) {
queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
if (points.getBytesPerDimension() != Long.BYTES
&& points.getBytesPerDimension() != Integer.BYTES) {
return null;
}
if (points.size() != points.getDocCount()) {
return null;
}
assert lowerValue <= upperValue;
byte[] queryLowerPoint;
byte[] queryUpperPoint;
if (points.getBytesPerDimension() == Integer.BYTES) {
queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
} else {
queryLowerPoint = LongPoint.pack(lowerValue).bytes;
queryUpperPoint = LongPoint.pack(upperValue).bytes;
}
if (matchNone(points, queryLowerPoint, queryUpperPoint)) {
return IteratorAndCount.empty();
}
if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
int maxDoc = context.reader().maxDoc();
if (points.getDocCount() == maxDoc) {
return IteratorAndCount.all(maxDoc);
} else {
queryLowerPoint = LongPoint.pack(lowerValue).bytes;
queryUpperPoint = LongPoint.pack(upperValue).bytes;
}
if (lowerValue > upperValue || matchNone(points, queryLowerPoint, queryUpperPoint)) {
return new BoundedDocIdSetIterator(0, 0, null);
}
int minDocId, maxDocId;
if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
minDocId = 0;
maxDocId = context.reader().maxDoc();
} else {
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
if (reverse) {
minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
} else {
minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
if (minDocId == -1) {
// No matches
return new BoundedDocIdSetIterator(0, 0, null);
}
}
if (reverse) {
maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
if (maxDocId == 0) {
// No matches
return new BoundedDocIdSetIterator(0, 0, null);
}
} else {
maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
if (maxDocId == -1) {
maxDocId = context.reader().maxDoc();
}
}
}
if ((points.getDocCount() == context.reader().maxDoc())) {
return new BoundedDocIdSetIterator(minDocId, maxDocId, null);
} else {
return new BoundedDocIdSetIterator(minDocId, maxDocId, delegate);
return IteratorAndCount.sparseRange(0, maxDoc, delegate);
}
}
return null;
int minDocId, maxDocId;
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
if (reverse) {
minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
} else {
minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
if (minDocId == -1) {
// No matches
return IteratorAndCount.empty();
}
}
if (reverse) {
maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
if (maxDocId == 0) {
// No matches
return IteratorAndCount.empty();
}
} else {
maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
if (maxDocId == -1) {
maxDocId = context.reader().maxDoc();
}
}
if (minDocId == maxDocId) {
return IteratorAndCount.empty();
}
if ((points.getDocCount() == context.reader().maxDoc())) {
return IteratorAndCount.denseRange(minDocId, maxDocId);
} else {
return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate);
}
}
private BoundedDocIdSetIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
throws IOException {
private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) throws IOException {
if (lowerValue > upperValue) {
return IteratorAndCount.empty();
}
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
if (numericValues != null) {
BoundedDocIdSetIterator iterator = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
if (iterator != null) {
return iterator;
IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
if (itAndCount != null) {
return itAndCount;
}
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
@ -548,7 +553,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
* no value.
*/
private BoundedDocIdSetIterator getDocIdSetIterator(
private IteratorAndCount getDocIdSetIterator(
SortField sortField,
SortField.Type sortFieldType,
LeafReaderContext context,
@ -592,19 +597,22 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
}
int lastDocIdExclusive = high + 1;
if (firstDocIdInclusive == lastDocIdExclusive) {
return IteratorAndCount.empty();
}
Object missingValue = sortField.getMissingValue();
BoundedDocIdSetIterator disi;
LeafReader reader = context.reader();
PointValues pointValues = reader.getPointValues(field);
final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
// all documents have docValues or missing value falls outside the range
if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
|| (missingLongValue < lowerValue || missingLongValue > upperValue)) {
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, null);
return IteratorAndCount.denseRange(firstDocIdInclusive, lastDocIdExclusive);
} else {
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
return IteratorAndCount.sparseRange(firstDocIdInclusive, lastDocIdExclusive, delegate);
}
return disi;
}
/** Compares the given document's value with a stored reference value. */
@ -643,6 +651,29 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
}
}
/**
* Provides a {@code DocIdSetIterator} along with an accurate count of documents provided by the
* iterator (or {@code -1} if an accurate count is unknown).
*/
private record IteratorAndCount(DocIdSetIterator it, int count) {
static IteratorAndCount empty() {
return new IteratorAndCount(DocIdSetIterator.empty(), 0);
}
static IteratorAndCount all(int maxDoc) {
return new IteratorAndCount(DocIdSetIterator.all(maxDoc), maxDoc);
}
static IteratorAndCount denseRange(int minDoc, int maxDoc) {
return new IteratorAndCount(DocIdSetIterator.range(minDoc, maxDoc), maxDoc - minDoc);
}
static IteratorAndCount sparseRange(int minDoc, int maxDoc, DocIdSetIterator delegate) {
return new IteratorAndCount(new BoundedDocIdSetIterator(minDoc, maxDoc, delegate), -1);
}
}
/**
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
* [firstDocInclusive, lastDoc).
@ -655,6 +686,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
private int docID = -1;
BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
assert delegate != null;
this.firstDoc = firstDoc;
this.lastDoc = lastDoc;
this.delegate = delegate;
@ -676,12 +708,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
target = firstDoc;
}
int result;
if (delegate != null) {
result = delegate.advance(target);
} else {
result = target;
}
int result = delegate.advance(target);
if (result < lastDoc) {
docID = result;
} else {
@ -692,7 +719,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
@Override
public long cost() {
return lastDoc - firstDoc;
return Math.min(delegate.cost(), lastDoc - firstDoc);
}
}
}