Break point estimation when threshold exceeded (#13199)

This commit is contained in:
gf2121 2024-03-26 19:29:34 +08:00 committed by GitHub
parent 8c4ec1dbef
commit 99b9636fd8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 36 additions and 16 deletions

View File

@ -237,6 +237,8 @@ Optimizations
(Ben Trent)
* GITHUB#13184: Make the HitQueue size more appropriate for KNN exact search (Pan Guixin)
* GITHUB#13199: Speed up dynamic pruning by breaking point estimation when threshold get exceeded. (Guo Feng)
Bug Fixes
---------------------

View File

@ -473,11 +473,10 @@ final class LatLonPointDistanceFeatureQuery extends Query {
};
final long currentQueryCost = Math.min(leadCost, it.cost());
final long threshold = currentQueryCost >>> 3;
long estimatedNumberOfMatches =
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
// TODO: what is the right factor compared to the current disi? Is 8 optimal?
if (estimatedNumberOfMatches >= threshold) {
final long threshold = currentQueryCost >>> 3;
if (PointValues.isEstimatedPointCountGreaterThanOrEqualTo(
visitor, pointValues.getPointTree(), threshold)) {
// the new range is not selective enough to be worth materializing
return;
}

View File

@ -436,11 +436,10 @@ final class LongDistanceFeatureQuery extends Query {
};
final long currentQueryCost = Math.min(leadCost, it.cost());
final long threshold = currentQueryCost >>> 3;
long estimatedNumberOfMatches =
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
// TODO: what is the right factor compared to the current disi? Is 8 optimal?
if (estimatedNumberOfMatches >= threshold) {
final long threshold = currentQueryCost >>> 3;
if (PointValues.isEstimatedPointCountGreaterThanOrEqualTo(
visitor, pointValues.getPointTree(), threshold)) {
// the new range is not selective enough to be worth materializing
return;
}

View File

@ -375,7 +375,7 @@ public abstract class PointValues {
public final long estimatePointCount(IntersectVisitor visitor) {
try {
final PointTree pointTree = getPointTree();
final long count = estimatePointCount(visitor, pointTree);
final long count = estimatePointCount(visitor, pointTree, Long.MAX_VALUE);
assert pointTree.moveToParent() == false;
return count;
} catch (IOException ioe) {
@ -383,8 +383,26 @@ public abstract class PointValues {
}
}
private long estimatePointCount(IntersectVisitor visitor, PointTree pointTree)
throws IOException {
/**
* Estimate if the point count that would be matched by {@link #intersect} with the given {@link
* IntersectVisitor} is greater than or equal to the upperBound.
*
* @lucene.internal
*/
public static boolean isEstimatedPointCountGreaterThanOrEqualTo(
IntersectVisitor visitor, PointTree pointTree, long upperBound) throws IOException {
return estimatePointCount(visitor, pointTree, upperBound) >= upperBound;
}
/**
* Estimate the number of documents that would be matched by {@link #intersect} with the given
* {@link IntersectVisitor}. The estimation will terminate when the point count gets greater than
* or equal to the upper bound.
*
* <p>TODO: will broad-first help estimation terminate earlier?
*/
private static long estimatePointCount(
IntersectVisitor visitor, PointTree pointTree, long upperBound) throws IOException {
Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
switch (r) {
case CELL_OUTSIDE_QUERY:
@ -398,8 +416,8 @@ public abstract class PointValues {
if (pointTree.moveToChild()) {
long cost = 0;
do {
cost += estimatePointCount(visitor, pointTree);
} while (pointTree.moveToSibling());
cost += estimatePointCount(visitor, pointTree, upperBound - cost);
} while (cost < upperBound && pointTree.moveToSibling());
pointTree.moveToParent();
return cost;
} else {

View File

@ -90,6 +90,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
private final LeafReaderContext context;
protected final NumericDocValues docValues;
private final PointValues pointValues;
private final PointValues.PointTree pointTree;
// if skipping functionality should be enabled on this segment
private final boolean enableSkipping;
private final int maxDoc;
@ -129,10 +130,12 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
+ " expected "
+ bytesCount);
}
this.pointTree = pointValues.getPointTree();
this.enableSkipping = true; // skipping is enabled when points are available
this.maxDoc = context.reader().maxDoc();
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
} else {
this.pointTree = null;
this.enableSkipping = false;
this.maxDoc = 0;
}
@ -282,9 +285,8 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
}
};
final long threshold = iteratorCost >>> 3;
long estimatedNumberOfMatches =
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
if (estimatedNumberOfMatches >= threshold) {
if (PointValues.isEstimatedPointCountGreaterThanOrEqualTo(visitor, pointTree, threshold)) {
// the new range is not selective enough to be worth materializing, it doesn't reduce number
// of docs at least 8x
updateSkipInterval(false);