mirror of https://github.com/apache/lucene.git
Don't let merged passages push out lower-scoring ones (#11990)
PassageScorer uses a priority queue of size maxPassages to keep track of which highlighted passages are worth returning to the user. Once all passages have been collected, we go through and merge overlapping passages together, but this reduction in the number of passages is not compensated for by re-adding the highest-scoring passages that were pushed out of the queue by passages which have been merged away. This commit increases the size of the priority queue to try and account for overlapping passages that will subsequently be merged together.
This commit is contained in:
parent
bd168ac2a8
commit
72ff140f5a
|
@ -166,6 +166,10 @@ Bug Fixes
|
|||
* GITHUB#11950: Fix NPE in BinaryRangeFieldRangeQuery variants when the queried field doesn't exist
|
||||
in a segment or is of the wrong type. (Greg Miller)
|
||||
|
||||
* GITHUB#11990: PassageSelector now has a larger minimum size for its priority queue,
|
||||
so that subsequent passage merges don't mean that we return too few passages in
|
||||
total. (Alan Woodward, Dawid Weiss)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
|
||||
|
|
|
@ -88,9 +88,13 @@ public class PassageSelector {
|
|||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// minimum priority queue size of 16 so that small maxPassages values don't
|
||||
// return too few passages due to subsequent passage merges
|
||||
int pqSize = Math.max(16, maxPassages);
|
||||
|
||||
// Best passages so far.
|
||||
PriorityQueue<Passage> pq =
|
||||
new PriorityQueue<>(maxPassages) {
|
||||
new PriorityQueue<>(pqSize) {
|
||||
@Override
|
||||
protected boolean lessThan(Passage a, Passage b) {
|
||||
return passageScorer.compare(a, b) < 0;
|
||||
|
@ -215,6 +219,7 @@ public class PassageSelector {
|
|||
}
|
||||
|
||||
// Remove nullified slots.
|
||||
last = Math.min(last, maxPassages);
|
||||
if (passages.length != last) {
|
||||
passages = ArrayUtil.copyOfSubArray(passages, 0, last);
|
||||
}
|
||||
|
|
|
@ -103,6 +103,18 @@ public class TestPassageSelector extends LuceneTestCase {
|
|||
checkPassages("...>123456<...", value, 6, 1, new OffsetRange(1, value.length()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void overlappingResultsDontPushOutLowerScoringResults() {
|
||||
String value =
|
||||
"a fair amount of space and then two matches here and then two big long spaces for matches";
|
||||
checkPassages(
|
||||
"...and then >two< >matches<...|...and then >two< big lon...",
|
||||
value,
|
||||
20,
|
||||
2,
|
||||
ranges(new OffsetRange(32, 35), new OffsetRange(36, 43), new OffsetRange(58, 61)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void highlightLargerThanWindowWithSubranges() {
|
||||
String value = "0123456789a";
|
||||
|
|
Loading…
Reference in New Issue