From 082d318abd791e129283018a4a359e7f2081149a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 26 Jan 2024 15:47:01 +0100 Subject: [PATCH] Early exit range queries on non-matching segments. (#13033) This change makes `PointRangeQuery` exit early when it knows that it doesn't match a segment. In the case when this query is part of a conjunction, this helps make sure `ScorerSupplier#get` doesn't get called on other required clauses, which is sometimes an expensive operation (e.g. multi-term queries). This is especially relevant for time-based data combined with `LogByteSizePolicy`, which gives non-adjacent ranges of timestamps to each segment, which in-turn increases the likelihood that some segments won't match a range query at all. --- lucene/CHANGES.txt | 5 +++ .../apache/lucene/search/PointRangeQuery.java | 18 ++++++++++ .../lucene/search/TestPointQueries.java | 33 +++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4d042cb72c6..6a54fecb9d8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -216,6 +216,11 @@ Optimizations * GITHUB#12989: Split taxonomy facet arrays across reusable chunks of elements to reduce allocations. (Michael Froh, Stefan Vodita) +* GITHUB#13033: PointRangeQuery now exits earlier on segments whose values + don't intersect with the query range. When a PointRangeQuery is a required + clause of a boolean query, this helps save work on other required clauses of + the same boolean query. (Adrien Grand) + Bug Fixes --------------------- * GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 60875bffe81..64fb00bdb34 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -290,6 +290,24 @@ public abstract class PointRangeQuery extends Query { return null; } + if (values.getDocCount() == 0) { + return null; + } else { + final byte[] fieldPackedLower = values.getMinPackedValue(); + final byte[] fieldPackedUpper = values.getMaxPackedValue(); + for (int i = 0; i < numDims; ++i) { + int offset = i * bytesPerDim; + if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0 + || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { + // If this query is a required clause of a boolean query, then returning null here + // will help make sure that we don't call ScorerSupplier#get on other required clauses + // of the same boolean query, which is an expensive operation for some queries (e.g. + // multi-term queries). + return null; + } + } + } + boolean allDocsMatch; if (values.getDocCount() == reader.maxDoc()) { final byte[] fieldPackedLower = values.getMinPackedValue(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index 341f75e280e..93646bfafa7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -2454,4 +2454,37 @@ public class TestPointQueries extends LuceneTestCase { r.close(); dir.close(); } + + public void testRangeQuerySkipsNonMatchingSegments() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + Document doc = new Document(); + doc.add(new IntPoint("field", 2)); + doc.add(new IntPoint("field2d", 1, 3)); + w.addDocument(doc); + + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = newSearcher(reader); + + Query query = IntPoint.newRangeQuery("field", 0, 1); + Weight weight = + searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + assertNull(weight.scorerSupplier(reader.leaves().get(0))); + + query = IntPoint.newRangeQuery("field", 3, 4); + weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + assertNull(weight.scorerSupplier(reader.leaves().get(0))); + + query = IntPoint.newRangeQuery("field2d", new int[] {0, 0}, new int[] {2, 2}); + weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + assertNull(weight.scorerSupplier(reader.leaves().get(0))); + + query = IntPoint.newRangeQuery("field2d", new int[] {2, 2}, new int[] {4, 4}); + weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + assertNull(weight.scorerSupplier(reader.leaves().get(0))); + + reader.close(); + w.close(); + dir.close(); + } }