Early exit range queries on non-matching segments. (#13033)

This change makes `PointRangeQuery` exit early when it knows that it doesn't match a segment. In the case when this query is part of a conjunction, this helps make sure `ScorerSupplier#get` doesn't get called on other required clauses, which is sometimes an expensive operation (e.g. multi-term queries). This is especially relevant for time-based data combined with `LogByteSizePolicy`, which gives non-adjacent ranges of timestamps to each segment, which in-turn increases the likelihood that some segments won't match a range query at all.
2025-03-07 00:39:21 +00:00 · 2024-01-26 15:47:01 +01:00 · 2024-01-26 15:47:01 +01:00 · 082d318abd
commit 082d318abd
parent fb5037f841
3 changed files with 56 additions and 0 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -216,6 +216,11 @@ Optimizations

 * GITHUB#12989: Split taxonomy facet arrays across reusable chunks of elements to reduce allocations. (Michael Froh, Stefan Vodita)

+* GITHUB#13033: PointRangeQuery now exits earlier on segments whose values
+  don't intersect with the query range. When a PointRangeQuery is a required
+  clause of a boolean query, this helps save work on other required clauses of
+  the same boolean query. (Adrien Grand)
+
 Bug Fixes
 ---------------------
 * GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)
--- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
@ -290,6 +290,24 @@ public abstract class PointRangeQuery extends Query {
          return null;
        }

+        if (values.getDocCount() == 0) {
+          return null;
+        } else {
+          final byte[] fieldPackedLower = values.getMinPackedValue();
+          final byte[] fieldPackedUpper = values.getMaxPackedValue();
+          for (int i = 0; i < numDims; ++i) {
+            int offset = i * bytesPerDim;
+            if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0
+                || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) {
+              // If this query is a required clause of a boolean query, then returning null here
+              // will help make sure that we don't call ScorerSupplier#get on other required clauses
+              // of the same boolean query, which is an expensive operation for some queries (e.g.
+              // multi-term queries).
+              return null;
+            }
+          }
+        }
+
        boolean allDocsMatch;
        if (values.getDocCount() == reader.maxDoc()) {
          final byte[] fieldPackedLower = values.getMinPackedValue();
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@ -2454,4 +2454,37 @@ public class TestPointQueries extends LuceneTestCase {
    r.close();
    dir.close();
  }
+
+  public void testRangeQuerySkipsNonMatchingSegments() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new IntPoint("field", 2));
+    doc.add(new IntPoint("field2d", 1, 3));
+    w.addDocument(doc);
+
+    DirectoryReader reader = DirectoryReader.open(w);
+    IndexSearcher searcher = newSearcher(reader);
+
+    Query query = IntPoint.newRangeQuery("field", 0, 1);
+    Weight weight =
+        searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
+    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
+
+    query = IntPoint.newRangeQuery("field", 3, 4);
+    weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
+    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
+
+    query = IntPoint.newRangeQuery("field2d", new int[] {0, 0}, new int[] {2, 2});
+    weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
+    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
+
+    query = IntPoint.newRangeQuery("field2d", new int[] {2, 2}, new int[] {4, 4});
+    weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
+    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
 }