Early exit range queries on non-matching segments. (#13033)

This change makes `PointRangeQuery` exit early when it knows that it doesn't
match a segment. In the case when this query is part of a conjunction, this
helps make sure `ScorerSupplier#get` doesn't get called on other required
clauses, which is sometimes an expensive operation (e.g. multi-term queries).

This is especially relevant for time-based data combined with
`LogByteSizePolicy`, which gives non-adjacent ranges of timestamps to each
segment, which in-turn increases the likelihood that some segments won't match
a range query at all.
This commit is contained in:
Adrien Grand 2024-01-26 15:47:01 +01:00 committed by GitHub
parent fb5037f841
commit 082d318abd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 56 additions and 0 deletions

View File

@ -216,6 +216,11 @@ Optimizations
* GITHUB#12989: Split taxonomy facet arrays across reusable chunks of elements to reduce allocations. (Michael Froh, Stefan Vodita)
* GITHUB#13033: PointRangeQuery now exits earlier on segments whose values
don't intersect with the query range. When a PointRangeQuery is a required
clause of a boolean query, this helps save work on other required clauses of
the same boolean query. (Adrien Grand)
Bug Fixes
---------------------
* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)

View File

@ -290,6 +290,24 @@ public abstract class PointRangeQuery extends Query {
return null;
}
if (values.getDocCount() == 0) {
return null;
} else {
final byte[] fieldPackedLower = values.getMinPackedValue();
final byte[] fieldPackedUpper = values.getMaxPackedValue();
for (int i = 0; i < numDims; ++i) {
int offset = i * bytesPerDim;
if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0
|| comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) {
// If this query is a required clause of a boolean query, then returning null here
// will help make sure that we don't call ScorerSupplier#get on other required clauses
// of the same boolean query, which is an expensive operation for some queries (e.g.
// multi-term queries).
return null;
}
}
}
boolean allDocsMatch;
if (values.getDocCount() == reader.maxDoc()) {
final byte[] fieldPackedLower = values.getMinPackedValue();

View File

@ -2454,4 +2454,37 @@ public class TestPointQueries extends LuceneTestCase {
r.close();
dir.close();
}
public void testRangeQuerySkipsNonMatchingSegments() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new IntPoint("field", 2));
doc.add(new IntPoint("field2d", 1, 3));
w.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
Query query = IntPoint.newRangeQuery("field", 0, 1);
Weight weight =
searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
assertNull(weight.scorerSupplier(reader.leaves().get(0)));
query = IntPoint.newRangeQuery("field", 3, 4);
weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
assertNull(weight.scorerSupplier(reader.leaves().get(0)));
query = IntPoint.newRangeQuery("field2d", new int[] {0, 0}, new int[] {2, 2});
weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
assertNull(weight.scorerSupplier(reader.leaves().get(0)));
query = IntPoint.newRangeQuery("field2d", new int[] {2, 2}, new int[] {4, 4});
weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
assertNull(weight.scorerSupplier(reader.leaves().get(0)));
reader.close();
w.close();
dir.close();
}
}