mirror of https://github.com/apache/lucene.git
Break point estimation when threshold exceeded (#13199)
This commit is contained in:
parent
8c4ec1dbef
commit
99b9636fd8
|
@ -237,6 +237,8 @@ Optimizations
|
|||
(Ben Trent)
|
||||
* GITHUB#13184: Make the HitQueue size more appropriate for KNN exact search (Pan Guixin)
|
||||
|
||||
* GITHUB#13199: Speed up dynamic pruning by breaking point estimation when threshold get exceeded. (Guo Feng)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -473,11 +473,10 @@ final class LatLonPointDistanceFeatureQuery extends Query {
|
|||
};
|
||||
|
||||
final long currentQueryCost = Math.min(leadCost, it.cost());
|
||||
final long threshold = currentQueryCost >>> 3;
|
||||
long estimatedNumberOfMatches =
|
||||
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||||
// TODO: what is the right factor compared to the current disi? Is 8 optimal?
|
||||
if (estimatedNumberOfMatches >= threshold) {
|
||||
final long threshold = currentQueryCost >>> 3;
|
||||
if (PointValues.isEstimatedPointCountGreaterThanOrEqualTo(
|
||||
visitor, pointValues.getPointTree(), threshold)) {
|
||||
// the new range is not selective enough to be worth materializing
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -436,11 +436,10 @@ final class LongDistanceFeatureQuery extends Query {
|
|||
};
|
||||
|
||||
final long currentQueryCost = Math.min(leadCost, it.cost());
|
||||
final long threshold = currentQueryCost >>> 3;
|
||||
long estimatedNumberOfMatches =
|
||||
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||||
// TODO: what is the right factor compared to the current disi? Is 8 optimal?
|
||||
if (estimatedNumberOfMatches >= threshold) {
|
||||
final long threshold = currentQueryCost >>> 3;
|
||||
if (PointValues.isEstimatedPointCountGreaterThanOrEqualTo(
|
||||
visitor, pointValues.getPointTree(), threshold)) {
|
||||
// the new range is not selective enough to be worth materializing
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -375,7 +375,7 @@ public abstract class PointValues {
|
|||
public final long estimatePointCount(IntersectVisitor visitor) {
|
||||
try {
|
||||
final PointTree pointTree = getPointTree();
|
||||
final long count = estimatePointCount(visitor, pointTree);
|
||||
final long count = estimatePointCount(visitor, pointTree, Long.MAX_VALUE);
|
||||
assert pointTree.moveToParent() == false;
|
||||
return count;
|
||||
} catch (IOException ioe) {
|
||||
|
@ -383,8 +383,26 @@ public abstract class PointValues {
|
|||
}
|
||||
}
|
||||
|
||||
private long estimatePointCount(IntersectVisitor visitor, PointTree pointTree)
|
||||
throws IOException {
|
||||
/**
|
||||
* Estimate if the point count that would be matched by {@link #intersect} with the given {@link
|
||||
* IntersectVisitor} is greater than or equal to the upperBound.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static boolean isEstimatedPointCountGreaterThanOrEqualTo(
|
||||
IntersectVisitor visitor, PointTree pointTree, long upperBound) throws IOException {
|
||||
return estimatePointCount(visitor, pointTree, upperBound) >= upperBound;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate the number of documents that would be matched by {@link #intersect} with the given
|
||||
* {@link IntersectVisitor}. The estimation will terminate when the point count gets greater than
|
||||
* or equal to the upper bound.
|
||||
*
|
||||
* <p>TODO: will broad-first help estimation terminate earlier?
|
||||
*/
|
||||
private static long estimatePointCount(
|
||||
IntersectVisitor visitor, PointTree pointTree, long upperBound) throws IOException {
|
||||
Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
|
||||
switch (r) {
|
||||
case CELL_OUTSIDE_QUERY:
|
||||
|
@ -398,8 +416,8 @@ public abstract class PointValues {
|
|||
if (pointTree.moveToChild()) {
|
||||
long cost = 0;
|
||||
do {
|
||||
cost += estimatePointCount(visitor, pointTree);
|
||||
} while (pointTree.moveToSibling());
|
||||
cost += estimatePointCount(visitor, pointTree, upperBound - cost);
|
||||
} while (cost < upperBound && pointTree.moveToSibling());
|
||||
pointTree.moveToParent();
|
||||
return cost;
|
||||
} else {
|
||||
|
|
|
@ -90,6 +90,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
|
|||
private final LeafReaderContext context;
|
||||
protected final NumericDocValues docValues;
|
||||
private final PointValues pointValues;
|
||||
private final PointValues.PointTree pointTree;
|
||||
// if skipping functionality should be enabled on this segment
|
||||
private final boolean enableSkipping;
|
||||
private final int maxDoc;
|
||||
|
@ -129,10 +130,12 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
|
|||
+ " expected "
|
||||
+ bytesCount);
|
||||
}
|
||||
this.pointTree = pointValues.getPointTree();
|
||||
this.enableSkipping = true; // skipping is enabled when points are available
|
||||
this.maxDoc = context.reader().maxDoc();
|
||||
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
||||
} else {
|
||||
this.pointTree = null;
|
||||
this.enableSkipping = false;
|
||||
this.maxDoc = 0;
|
||||
}
|
||||
|
@ -282,9 +285,8 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
|
|||
}
|
||||
};
|
||||
final long threshold = iteratorCost >>> 3;
|
||||
long estimatedNumberOfMatches =
|
||||
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||||
if (estimatedNumberOfMatches >= threshold) {
|
||||
|
||||
if (PointValues.isEstimatedPointCountGreaterThanOrEqualTo(visitor, pointTree, threshold)) {
|
||||
// the new range is not selective enough to be worth materializing, it doesn't reduce number
|
||||
// of docs at least 8x
|
||||
updateSkipInterval(false);
|
||||
|
|
Loading…
Reference in New Issue