From cf29597fecb894c0562fda97f4fc1c877cebb550 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 15 Oct 2024 15:20:34 +0200 Subject: [PATCH] Make MaxScoreBulkScorer repartition scorers when the min competitive increases. (#13800) MaxScoreBulkScorer partitions scorers into a set of essential scorers and a set of non-essential scorers, depending on the maximum scores produced by scorers and on the current minimum competitive score. An increase of the minimum competitive score has the potential to yield a more favorable partitioning, but repartitioning can also be expensive. In order to repartition when necessary while avoiding to repartition too often, this PR tracks the minimum value of the minimum competitive score that would produce a more favorable partitioning, and repartitions scorers whenever the minimum competitive score exceeds this threshold. --- lucene/CHANGES.txt | 4 +++ .../lucene/search/MaxScoreBulkScorer.java | 11 +++++++- .../lucene/search/TestMaxScoreBulkScorer.java | 27 ------------------- 3 files changed, 14 insertions(+), 28 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 48a7e784481..5d9343bca2a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -47,8 +47,12 @@ Improvements Optimizations --------------------- + * GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao) +* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the + minimum competitive allows for a more favorable partitioning. (Adrien Grand) + Bug Fixes --------------------- * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index 8786343ccec..18f5b83e93a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer { // Index of the first scorer that is required, this scorer and all following scorers are required // for a document to match. int firstRequiredScorer; + // The minimum value of minCompetitiveScore that would produce a more favorable partitioning. + float nextMinCompetitiveScore; private final long cost; float minCompetitiveScore; private final Score scorable = new Score(); @@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer { while (top.doc < outerWindowMax) { scoreInnerWindow(collector, acceptDocs, outerWindowMax); top = essentialQueue.top(); + if (minCompetitiveScore >= nextMinCompetitiveScore) { + // The minimum competitive score increased substantially, so we can now partition scorers + // in a more favorable way. + break; + } } - outerWindowMin = outerWindowMax; + outerWindowMin = Math.min(top.doc, outerWindowMax); } return nextCandidate(max); @@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer { }); double maxScoreSum = 0; firstEssentialScorer = 0; + nextMinCompetitiveScore = Float.POSITIVE_INFINITY; for (int i = 0; i < allScorers.length; ++i) { final DisiWrapper w = scratch[i]; double newMaxScoreSum = maxScoreSum + w.maxWindowScore; @@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer { firstEssentialScorer++; } else { allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w; + nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java index c6920403c91..6973cc0025a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java @@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits; // These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept public class TestMaxScoreBulkScorer extends LuceneTestCase { - private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer { - - public CapMaxScoreWindowAt2048Scorer(Scorer in) { - super(in); - } - - @Override - public int advanceShallow(int target) throws IOException { - return Math.min(target | 0x7FF, in.advanceShallow(target)); - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return in.getMaxScore(upTo); - } - } - private void writeDocuments(Directory dir) throws IOException { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { @@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { searcher .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); Scorer scorer2 = searcher .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); BulkScorer scorer = new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); @@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { searcher .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); Scorer scorer2 = searcher .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); BulkScorer scorer = new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); @@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { searcher .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); Scorer scorer2 = searcher .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); Scorer scorer3 = searcher .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3); BulkScorer scorer = new MaxScoreBulkScorer( @@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { searcher .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); Scorer scorer2 = searcher .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); Scorer scorer3 = searcher .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) .scorer(context); - scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3); BulkScorer scorer = new MaxScoreBulkScorer(