Don't let merged passages push out lower-scoring ones (#11990)

PassageScorer uses a priority queue of size maxPassages to keep track of
which highlighted passages are worth returning to the user. Once all
passages have been collected, we go through and merge overlapping
passages together, but this reduction in the number of passages is not
compensated for by re-adding the highest-scoring passages that were pushed
out of the queue by passages which have been merged away.

This commit increases the size of the priority queue to try and account for
overlapping passages that will subsequently be merged together.
This commit is contained in:
Alan Woodward 2022-12-01 12:25:29 +00:00 committed by GitHub
parent bd168ac2a8
commit 72ff140f5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 1 deletions

View File

@ -166,6 +166,10 @@ Bug Fixes
* GITHUB#11950: Fix NPE in BinaryRangeFieldRangeQuery variants when the queried field doesn't exist
in a segment or is of the wrong type. (Greg Miller)
* GITHUB#11990: PassageSelector now has a larger minimum size for its priority queue,
so that subsequent passage merges don't mean that we return too few passages in
total. (Alan Woodward, Dawid Weiss)
Optimizations
---------------------
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all

View File

@ -88,9 +88,13 @@ public class PassageSelector {
return Collections.emptyList();
}
// minimum priority queue size of 16 so that small maxPassages values don't
// return too few passages due to subsequent passage merges
int pqSize = Math.max(16, maxPassages);
// Best passages so far.
PriorityQueue<Passage> pq =
new PriorityQueue<>(maxPassages) {
new PriorityQueue<>(pqSize) {
@Override
protected boolean lessThan(Passage a, Passage b) {
return passageScorer.compare(a, b) < 0;
@ -215,6 +219,7 @@ public class PassageSelector {
}
// Remove nullified slots.
last = Math.min(last, maxPassages);
if (passages.length != last) {
passages = ArrayUtil.copyOfSubArray(passages, 0, last);
}

View File

@ -103,6 +103,18 @@ public class TestPassageSelector extends LuceneTestCase {
checkPassages("...>123456<...", value, 6, 1, new OffsetRange(1, value.length()));
}
@Test
public void overlappingResultsDontPushOutLowerScoringResults() {
String value =
"a fair amount of space and then two matches here and then two big long spaces for matches";
checkPassages(
"...and then >two< >matches<...|...and then >two< big lon...",
value,
20,
2,
ranges(new OffsetRange(32, 35), new OffsetRange(36, 43), new OffsetRange(58, 61)));
}
@Test
public void highlightLargerThanWindowWithSubranges() {
String value = "0123456789a";