mirror of https://github.com/apache/lucene.git
LUCENE-10378 Implement Weight#count for PointRangeQuery (#658)
Implement Weight#count for PointRangeQuery to provide a faster way to calculate the number of matching range docs when each doc has at-most one point and the points are 1-dimensional.
This commit is contained in:
parent
6157854523
commit
dd25fabb03
|
@ -150,6 +150,10 @@ New Features
|
||||||
* LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (Dawid Weiss,
|
* LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (Dawid Weiss,
|
||||||
Alan Woodward)
|
Alan Woodward)
|
||||||
|
|
||||||
|
* LUCENE-10378: Implement Weight#count for PointRangeQuery to provide a faster way to calculate
|
||||||
|
the number of matching range docs when each doc has at-most one point and the points are 1-dimensional.
|
||||||
|
(Gautam Worah, Ignacio Vera, Adrien Grand)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,7 @@ public final class DocValuesFieldExistsQuery extends Query {
|
||||||
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||||
if (fieldInfo == null || fieldInfo.getDocValuesType() == DocValuesType.NONE) {
|
if (fieldInfo == null || fieldInfo.getDocValuesType() == DocValuesType.NONE) {
|
||||||
return 0; // the field doesn't index doc values
|
return 0; // the field doesn't index doc values
|
||||||
} else if (!reader.hasDeletions()) {
|
} else if (reader.hasDeletions() == false) {
|
||||||
if (fieldInfo.getPointDimensionCount() > 0) {
|
if (fieldInfo.getPointDimensionCount() > 0) {
|
||||||
return reader.getPointValues(field).getDocCount();
|
return reader.getPointValues(field).getDocCount();
|
||||||
} else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
} else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
||||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Predicate;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
@ -258,14 +260,10 @@ public abstract class PointRangeQuery extends Query {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private boolean checkValidPointValues(PointValues values) throws IOException {
|
||||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
|
||||||
LeafReader reader = context.reader();
|
|
||||||
|
|
||||||
PointValues values = reader.getPointValues(field);
|
|
||||||
if (values == null) {
|
if (values == null) {
|
||||||
// No docs in this segment/field indexed any points
|
// No docs in this segment/field indexed any points
|
||||||
return null;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (values.getNumIndexDimensions() != numDims) {
|
if (values.getNumIndexDimensions() != numDims) {
|
||||||
|
@ -286,6 +284,17 @@ public abstract class PointRangeQuery extends Query {
|
||||||
+ " but this query has bytesPerDim="
|
+ " but this query has bytesPerDim="
|
||||||
+ bytesPerDim);
|
+ bytesPerDim);
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||||
|
LeafReader reader = context.reader();
|
||||||
|
|
||||||
|
PointValues values = reader.getPointValues(field);
|
||||||
|
if (checkValidPointValues(values) == false) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
boolean allDocsMatch;
|
boolean allDocsMatch;
|
||||||
if (values.getDocCount() == reader.maxDoc()) {
|
if (values.getDocCount() == reader.maxDoc()) {
|
||||||
|
@ -369,6 +378,106 @@ public abstract class PointRangeQuery extends Query {
|
||||||
return scorerSupplier.get(Long.MAX_VALUE);
|
return scorerSupplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int count(LeafReaderContext context) throws IOException {
|
||||||
|
LeafReader reader = context.reader();
|
||||||
|
|
||||||
|
PointValues values = reader.getPointValues(field);
|
||||||
|
if (checkValidPointValues(values) == false) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reader.hasDeletions() == false
|
||||||
|
&& numDims == 1
|
||||||
|
&& values.getDocCount() == values.size()) {
|
||||||
|
// if all documents have at-most one point
|
||||||
|
return (int) pointCount(values.getPointTree(), this::relate, this::matches);
|
||||||
|
}
|
||||||
|
return super.count(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the number of points matching the provided range conditions. Using this method is
|
||||||
|
* faster than calling {@link PointValues#intersect(IntersectVisitor)} to get the count of
|
||||||
|
* intersecting points. This method does not enforce live documents, therefore it should only
|
||||||
|
* be used when there are no deleted documents.
|
||||||
|
*
|
||||||
|
* @param pointTree start node of the count operation
|
||||||
|
* @param nodeComparator comparator to be used for checking whether the internal node is
|
||||||
|
* inside the range
|
||||||
|
* @param leafComparator comparator to be used for checking whether the leaf node is inside
|
||||||
|
* the range
|
||||||
|
* @return count of points that match the range
|
||||||
|
*/
|
||||||
|
private long pointCount(
|
||||||
|
PointValues.PointTree pointTree,
|
||||||
|
BiFunction<byte[], byte[], Relation> nodeComparator,
|
||||||
|
Predicate<byte[]> leafComparator)
|
||||||
|
throws IOException {
|
||||||
|
final int[] matchingNodeCount = {0};
|
||||||
|
// create a custom IntersectVisitor that records the number of leafNodes that matched
|
||||||
|
final IntersectVisitor visitor =
|
||||||
|
new IntersectVisitor() {
|
||||||
|
@Override
|
||||||
|
public void visit(int docID) {
|
||||||
|
// this branch should be unreachable
|
||||||
|
throw new UnsupportedOperationException(
|
||||||
|
"This IntersectVisitor does not perform any actions on a "
|
||||||
|
+ "docID="
|
||||||
|
+ docID
|
||||||
|
+ " node being visited");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(int docID, byte[] packedValue) {
|
||||||
|
if (leafComparator.test(packedValue)) {
|
||||||
|
matchingNodeCount[0]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
|
return nodeComparator.apply(minPackedValue, maxPackedValue);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
pointCount(visitor, pointTree, matchingNodeCount);
|
||||||
|
return matchingNodeCount[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private void pointCount(
|
||||||
|
IntersectVisitor visitor, PointValues.PointTree pointTree, int[] matchingNodeCount)
|
||||||
|
throws IOException {
|
||||||
|
Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
|
||||||
|
switch (r) {
|
||||||
|
case CELL_OUTSIDE_QUERY:
|
||||||
|
// This cell is fully outside the query shape: return 0 as the count of its nodes
|
||||||
|
return;
|
||||||
|
case CELL_INSIDE_QUERY:
|
||||||
|
// This cell is fully inside the query shape: return the size of the entire node as the
|
||||||
|
// count
|
||||||
|
matchingNodeCount[0] += pointTree.size();
|
||||||
|
return;
|
||||||
|
case CELL_CROSSES_QUERY:
|
||||||
|
/*
|
||||||
|
The cell crosses the shape boundary, or the cell fully contains the query, so we fall
|
||||||
|
through and do full counting.
|
||||||
|
*/
|
||||||
|
if (pointTree.moveToChild()) {
|
||||||
|
do {
|
||||||
|
pointCount(visitor, pointTree, matchingNodeCount);
|
||||||
|
} while (pointTree.moveToSibling());
|
||||||
|
pointTree.moveToParent();
|
||||||
|
} else {
|
||||||
|
// we have reached a leaf node here.
|
||||||
|
pointTree.visitDocValues(visitor);
|
||||||
|
// leaf node count is saved in the matchingNodeCount array by the visitor
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Unreachable code");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isCacheable(LeafReaderContext ctx) {
|
public boolean isCacheable(LeafReaderContext ctx) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -2194,6 +2194,60 @@ public class TestPointQueries extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPointRangeWeightCount() throws IOException {
|
||||||
|
// the optimization for Weight#count kicks in only when the number of dimensions is 1
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
|
||||||
|
int numPoints = random().nextInt(1, 10);
|
||||||
|
int[] points = new int[numPoints];
|
||||||
|
|
||||||
|
int numQueries = random().nextInt(1, 10);
|
||||||
|
int[] lowerBound = new int[numQueries];
|
||||||
|
int[] upperBound = new int[numQueries];
|
||||||
|
int[] expectedCount = new int[numQueries];
|
||||||
|
|
||||||
|
for (int i = 0; i < numQueries; i++) {
|
||||||
|
// generate random queries
|
||||||
|
lowerBound[i] = random().nextInt(1, 10);
|
||||||
|
// allow malformed ranges where upperBound could be less than lowerBound
|
||||||
|
upperBound[i] = random().nextInt(1, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numPoints; i++) {
|
||||||
|
// generate random 1D points
|
||||||
|
points[i] = random().nextInt(1, 10);
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
// the doc may have at-most 1 point
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new IntPoint("point", points[i]));
|
||||||
|
w.addDocument(doc);
|
||||||
|
for (int j = 0; j < numQueries; j++) {
|
||||||
|
// calculate the number of points that lie within the query range
|
||||||
|
if (lowerBound[j] <= points[i] && points[i] <= upperBound[j]) {
|
||||||
|
expectedCount[j]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.commit();
|
||||||
|
w.forceMerge(1);
|
||||||
|
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
if (searcher.leafContexts.isEmpty() == false) { // we need at least 1 leaf in the segment
|
||||||
|
for (int i = 0; i < numQueries; i++) {
|
||||||
|
Query query = IntPoint.newRangeQuery("point", lowerBound[i], upperBound[i]);
|
||||||
|
Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||||
|
assertEquals(expectedCount[i], weight.count(searcher.leafContexts.get(0)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testPointRangeEquals() {
|
public void testPointRangeEquals() {
|
||||||
Query q1, q2;
|
Query q1, q2;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue