From b12404b09b3a744323ceddff841dc2e39f1d5024 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 22 Nov 2024 18:18:51 +0100 Subject: [PATCH] Make CombinedFieldQuery eligible for WAND/MAXSCORE. (#13999) `CombinedFieldQuery` currently returns an infinite maximum score. We can do better by returning the maximum score that the sim scorer can return, which in the case of BM25 is bounded by the IDF. This makes CombinedFieldQuery eligible for WAND/MAXSCORE (not their block-max variants though, since we return the same score upper bound for the whole index). --- lucene/CHANGES.txt | 3 +++ .../org/apache/lucene/sandbox/search/CombinedFieldQuery.java | 4 +++- .../apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 00cf6bb8874..dabb7a6f28b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -76,6 +76,9 @@ Optimizations * GITHUB#14000: Speed up top-k retrieval of filtered disjunctions. (Adrien Grand) +* GITHUB#13999: CombinedFieldQuery now returns non-infinite maximum scores, + making it eligible to dynamic pruning. (Adrien Grand) + Bug Fixes --------------------- * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java index 8f21c8e1850..f709cfe2f75 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java @@ -438,12 +438,14 @@ public final class CombinedFieldQuery extends Query implements Accountable { private final DisiPriorityQueue queue; private final DocIdSetIterator iterator; private final MultiNormsLeafSimScorer simScorer; + private final float maxScore; CombinedFieldScorer( DisiPriorityQueue queue, DocIdSetIterator iterator, MultiNormsLeafSimScorer simScorer) { this.queue = queue; this.iterator = iterator; this.simScorer = simScorer; + this.maxScore = simScorer.getSimScorer().score(Float.POSITIVE_INFINITY, 1L); } @Override @@ -475,7 +477,7 @@ public final class CombinedFieldQuery extends Query implements Accountable { @Override public float getMaxScore(int upTo) throws IOException { - return Float.POSITIVE_INFINITY; + return maxScore; } } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java index 026fa4130d3..e13fc903999 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java @@ -90,6 +90,10 @@ final class MultiNormsLeafSimScorer { } } + SimScorer getSimScorer() { + return scorer; + } + private long getNormValue(int doc) throws IOException { if (norms != null) { boolean found = norms.advanceExact(doc);