Early exit range queries on non-matching segments. (#13033)

This change makes `PointRangeQuery` exit early when it knows that it doesn't match a segment. In the case when this query is part of a conjunction, this helps make sure `ScorerSupplier#get` doesn't get called on other required clauses, which is sometimes an expensive operation (e.g. multi-term queries). This is especially relevant for time-based data combined with `LogByteSizePolicy`, which gives non-adjacent ranges of timestamps to each segment, which in-turn increases the likelihood that some segments won't match a range query at all.
2024-01-26 15:47:01 +01:00 · 2024-01-26 15:47:01 +01:00 · 082d318abd
parent fb5037f841
commit 082d318abd
3 changed files with 56 additions and 0 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -216,6 +216,11 @@ Optimizations
 * GITHUB#12989: Split taxonomy facet arrays across reusable chunks of elements to reduce allocations. (Michael Froh, Stefan Vodita)
 * GITHUB#13033: PointRangeQuery now exits earlier on segments whose values
  don't intersect with the query range. When a PointRangeQuery is a required
  clause of a boolean query, this helps save work on other required clauses of
  the same boolean query. (Adrien Grand)
 Bug Fixes
 ---------------------
 * GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)
--- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
@ -290,6 +290,24 @@ public abstract class PointRangeQuery extends Query {
          return null;
        }
        if (values.getDocCount() == 0) {
          return null;
        } else {
          final byte[] fieldPackedLower = values.getMinPackedValue();
          final byte[] fieldPackedUpper = values.getMaxPackedValue();
          for (int i = 0; i < numDims; ++i) {
            int offset = i * bytesPerDim;
            if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0
                || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) {
              // If this query is a required clause of a boolean query, then returning null here
              // will help make sure that we don't call ScorerSupplier#get on other required clauses
              // of the same boolean query, which is an expensive operation for some queries (e.g.
              // multi-term queries).
              return null;
            }
          }
        }
        boolean allDocsMatch;
        if (values.getDocCount() == reader.maxDoc()) {
          final byte[] fieldPackedLower = values.getMinPackedValue();
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@ -2454,4 +2454,37 @@ public class TestPointQueries extends LuceneTestCase {
    r.close();
    dir.close();
  }
  public void testRangeQuerySkipsNonMatchingSegments() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    Document doc = new Document();
    doc.add(new IntPoint("field", 2));
    doc.add(new IntPoint("field2d", 1, 3));
    w.addDocument(doc);
    DirectoryReader reader = DirectoryReader.open(w);
    IndexSearcher searcher = newSearcher(reader);
    Query query = IntPoint.newRangeQuery("field", 0, 1);
    Weight weight =
        searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
    query = IntPoint.newRangeQuery("field", 3, 4);
    weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
    query = IntPoint.newRangeQuery("field2d", new int[] {0, 0}, new int[] {2, 2});
    weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
    query = IntPoint.newRangeQuery("field2d", new int[] {2, 2}, new int[] {4, 4});
    weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
    assertNull(weight.scorerSupplier(reader.leaves().get(0)));
    reader.close();
    w.close();
    dir.close();
  }
 }