From 8703e449cee0693e50a7922a86c1cbc7dcf95d13 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 20 Jun 2023 18:55:03 +0200 Subject: [PATCH] Change the MAXSCORE scorer to a bulk scorer. (#12361) --- lucene/CHANGES.txt | 4 + .../lucene/search/BlockMaxMaxscoreScorer.java | 318 ----------------- .../apache/lucene/search/BooleanWeight.java | 29 +- .../org/apache/lucene/search/DisiWrapper.java | 4 +- .../lucene/search/MaxScoreBulkScorer.java | 222 ++++++++++++ .../lucene/search/MaxScoreSumPropagator.java | 4 + .../search/TestBlockMaxMaxscoreScorer.java | 333 ------------------ .../lucene/search/TestMaxScoreBulkScorer.java | 325 +++++++++++++++++ 8 files changed, 558 insertions(+), 681 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/search/BlockMaxMaxscoreScorer.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java delete mode 100644 lucene/core/src/test/org/apache/lucene/search/TestBlockMaxMaxscoreScorer.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1bb61027d0e..174872df1eb 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -128,8 +128,12 @@ Improvements Optimizations --------------------- + * GITHUB#12377: Avoid redundant loop for compute min value in DirectMonotonicWriter. (Chao Zhang) +* GITHUB#12361: Faster top-level disjunctions sorted by descending score. + (Adrien Grand) + Bug Fixes --------------------- (No changes) diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxMaxscoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxMaxscoreScorer.java deleted file mode 100644 index c6fa22a66e0..00000000000 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxMaxscoreScorer.java +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; - -/** Scorer implementing Block-Max Maxscore algorithm */ -class BlockMaxMaxscoreScorer extends Scorer { - // current doc ID of the leads - private int doc; - - // doc id boundary that all scorers maxScore are valid - private int upTo; - - // heap of scorers ordered by doc ID - private final DisiPriorityQueue essentialsScorers; - - // array of scorers ordered by maxScore - private final DisiWrapper[] allScorers; - - // index of the first essential scorer in the `allScorers` array. All scorers before this index - // are non-essential. All scorers on and after this index are essential. - private int firstEssentialScorerIndex; - - // sum of max scores of scorers in nonEssentialScorers list - private double nonEssentialMaxScoreSum; - - private final long cost; - - private final MaxScoreSumPropagator maxScoreSumPropagator; - - private float minCompetitiveScore; - - private double score; - - /** - * Constructs a Scorer that scores doc based on Block-Max-Maxscore (BMM) algorithm - * http://engineering.nyu.edu/~suel/papers/bmm.pdf . This algorithm has lower overhead compared to - * WANDScorer, and could be used for simple disjunction queries. - * - * @param weight The weight to be used. - * @param scorers The sub scorers this Scorer should iterate on for optional clauses. - */ - public BlockMaxMaxscoreScorer(Weight weight, List scorers) throws IOException { - super(weight); - - this.upTo = -1; - this.doc = -1; - this.minCompetitiveScore = 0; - this.allScorers = new DisiWrapper[scorers.size()]; - this.essentialsScorers = new DisiPriorityQueue(scorers.size()); - this.firstEssentialScorerIndex = 0; - - long cost = 0; - for (int i = 0; i < scorers.size(); i++) { - DisiWrapper w = new DisiWrapper(scorers.get(i)); - cost += w.cost; - allScorers[i] = w; - } - - this.cost = cost; - maxScoreSumPropagator = new MaxScoreSumPropagator(scorers); - } - - @Override - public DocIdSetIterator iterator() { - // twoPhaseIterator needed to honor scorer.setMinCompetitiveScore guarantee - return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); - } - - @Override - public TwoPhaseIterator twoPhaseIterator() { - DocIdSetIterator approximation = - new DocIdSetIterator() { - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - while (true) { - - if (target > upTo) { - updateMaxScoresAndLists(target); - } else { - // minCompetitiveScore might have increased, - // move potentially no-longer-competitive scorers from essential to non-essential - // list - movePotentiallyNonCompetitiveScorers(); - } - - assert target <= upTo; - - DisiWrapper top = essentialsScorers.top(); - - if (top == null) { - // all scorers in non-essential list, skip to next boundary or return no_more_docs - if (upTo == NO_MORE_DOCS) { - return doc = NO_MORE_DOCS; - } else { - target = upTo + 1; - } - } else { - // position all scorers in essential list to on or after target - while (top.doc < target) { - top.doc = top.iterator.advance(target); - top = essentialsScorers.updateTop(); - } - - if (top.doc == NO_MORE_DOCS) { - return doc = NO_MORE_DOCS; - } else if (top.doc > upTo) { - target = upTo + 1; - } else { - return doc = top.doc; - } - } - } - } - - private void movePotentiallyNonCompetitiveScorers() { - boolean removedEssentialScorer = false; - while (firstEssentialScorerIndex < allScorers.length - && maxScoreSumPropagator.scoreSumUpperBound( - nonEssentialMaxScoreSum + allScorers[firstEssentialScorerIndex].maxScore) - < minCompetitiveScore) { - DisiWrapper nextLeastContributingScorer = allScorers[firstEssentialScorerIndex++]; - nonEssentialMaxScoreSum += nextLeastContributingScorer.maxScore; - removedEssentialScorer = true; - } - - // list adjusted - if (removedEssentialScorer) { - essentialsScorers.clear(); - essentialsScorers.addAll( - allScorers, - firstEssentialScorerIndex, - allScorers.length - firstEssentialScorerIndex); - } - } - - private void updateMaxScoresAndLists(int target) throws IOException { - assert target > upTo; - // Next candidate doc id is above interval boundary, or minCompetitive has increased. - // Find next interval boundary. - // Block boundary alignment strategy is adapted from "Optimizing Top-k Document - // Retrieval Strategies for Block-Max Indexes" by Dimopoulos, Nepomnyachiy and Suel. - // Find the block interval boundary by computing statistics (max, avg etc.) from all - // participating scorer's block boundary. Then run BMM within the boundary. - updateUpToAndMaxScore(target); - repartitionLists(); - } - - private void updateUpToAndMaxScore(int target) throws IOException { - // reset upTo - upTo = -1; - for (DisiWrapper w : allScorers) { - // using Math.max here is a good approach when there are only two clauses, - // but when this scorer is used for more than two clauses, we may need to - // consider other approaches such as avg, as the further out the boundary, - // the higher maxScore would be for a scorer, which makes skipping based on - // comparison with minCompetitiveScore harder / less effective. - upTo = Math.max(w.scorer.advanceShallow(Math.max(w.doc, target)), upTo); - } - assert target <= upTo; - - for (DisiWrapper w : allScorers) { - // The assertion below will hold as long as upTo was computed using Math.max - // However, when the upTo computation method changes (to Math.avg etc), - // we may need to also handle the scenario where w.doc > upTo - assert w.doc <= upTo; - w.maxScore = w.scorer.getMaxScore(upTo); - } - } - - private void repartitionLists() { - firstEssentialScorerIndex = 0; - Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxScore)); - - // Re-partition the scorers into non-essential list and essential list, as defined in - // the "Optimizing Top-k Document Retrieval Strategies for Block-Max Indexes" paper. - nonEssentialMaxScoreSum = 0; - for (DisiWrapper w : allScorers) { - if (maxScoreSumPropagator.scoreSumUpperBound(nonEssentialMaxScoreSum + w.maxScore) - >= minCompetitiveScore) { - break; - } - firstEssentialScorerIndex++; - nonEssentialMaxScoreSum += w.maxScore; - } - essentialsScorers.clear(); - essentialsScorers.addAll( - allScorers, - firstEssentialScorerIndex, - allScorers.length - firstEssentialScorerIndex); - } - - @Override - public long cost() { - // fixed at initialization - return cost; - } - }; - - return new TwoPhaseIterator(approximation) { - - @Override - public boolean matches() throws IOException { - // Start evaluating the score of the new document. It initially only includes essential - // clauses and abort / return early if a match is not possible. - // Scores of non-essential clauses get added later on to determine actual matches. - score = 0; - for (DisiWrapper w = essentialsScorers.topList(); w != null; w = w.next) { - score += w.scorer.score(); - } - - final double docScoreUpperBound = score + nonEssentialMaxScoreSum; - - if (maxScoreSumPropagator.scoreSumUpperBound(docScoreUpperBound) < minCompetitiveScore) { - return false; - } - - // Continue to add scores of non-essential scorers - for (int i = 0; i < firstEssentialScorerIndex; ++i) { - DisiWrapper w = allScorers[i]; - if (w.doc < doc) { - w.doc = w.iterator.advance(doc); - } - if (w.doc == doc) { - score += allScorers[i].scorer.score(); - } - } - - return score() >= minCompetitiveScore; - } - - @Override - public float matchCost() { - // over-estimate - return allScorers.length; - } - }; - } - - @Override - public int advanceShallow(int target) throws IOException { - // Propagate to improve score bounds - maxScoreSumPropagator.advanceShallow(target); - - int result = DocIdSetIterator.NO_MORE_DOCS; - for (DisiWrapper s : allScorers) { - if (s.doc < target) { - result = Math.min(result, s.scorer.advanceShallow(target)); - } - } - - return result; - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return maxScoreSumPropagator.getMaxScore(upTo); - } - - @Override - public float score() throws IOException { - return (float) score; - } - - @Override - public int docID() { - return doc; - } - - @Override - public final Collection getChildren() { - List matchingChildren = new ArrayList<>(); - for (DisiWrapper s : allScorers) { - if (s.doc == doc) { - matchingChildren.add(new ChildScorable(s.scorer, "SHOULD")); - } - } - return matchingChildren; - } - - @Override - public void setMinCompetitiveScore(float minScore) throws IOException { - assert minScore >= 0; - minCompetitiveScore = minScore; - maxScoreSumPropagator.setMinCompetitiveScore(minScore); - } -} diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index ce417e3cb79..2d55e468536 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -218,34 +218,7 @@ final class BooleanWeight extends Weight { optionalScorers.add(ss.get(Long.MAX_VALUE)); } - return new BulkScorer() { - final Scorer bmmScorer = new BlockMaxMaxscoreScorer(BooleanWeight.this, optionalScorers); - final DocIdSetIterator iterator = bmmScorer.iterator(); - - @Override - public int score(LeafCollector collector, Bits acceptDocs, int min, int max) - throws IOException { - collector.setScorer(bmmScorer); - - int doc = bmmScorer.docID(); - if (doc < min) { - doc = iterator.advance(min); - } - while (doc < max) { - if (acceptDocs == null || acceptDocs.get(doc)) { - collector.collect(doc); - } - - doc = iterator.nextDoc(); - } - return doc; - } - - @Override - public long cost() { - return iterator.cost(); - } - }; + return new MaxScoreBulkScorer(optionalScorers); } List optional = new ArrayList(); diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index d910cb3c781..350bffc940e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -39,8 +39,8 @@ public class DisiWrapper { // For WANDScorer long scaledMaxScore; - // For BlockMaxMaxscoreScorer - float maxScore; + // for MaxScoreBulkScorer + float maxWindowScore; public DisiWrapper(Scorer scorer) { this.scorer = scorer; diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java new file mode 100644 index 00000000000..2bb475136b3 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import org.apache.lucene.util.Bits; + +final class MaxScoreBulkScorer extends BulkScorer { + + // All scorers, sorted by increasing max score. + private final DisiWrapper[] allScorers; + // These are the last scorers from `allScorers` that are "essential", ie. required for a match to + // have a competitive score. + private final DisiPriorityQueue essentialQueue; + // Index of the first essential scorer, ie. essentialQueue contains all scorers from + // allScorers[firstEssentialScorer:]. All scorers below this index are non-essential. + private int firstEssentialScorer; + private final MaxScoreSumPropagator maxScorePropagator; + private final long cost; + private float minCompetitiveScore; + private boolean minCompetitiveScoreUpdated; + private ScoreAndDoc scorable = new ScoreAndDoc(); + private final double[] maxScoreSums; + + MaxScoreBulkScorer(List scorers) throws IOException { + allScorers = new DisiWrapper[scorers.size()]; + int i = 0; + long cost = 0; + for (Scorer scorer : scorers) { + DisiWrapper w = new DisiWrapper(scorer); + cost += w.cost; + allScorers[i++] = w; + } + this.cost = cost; + maxScorePropagator = new MaxScoreSumPropagator(scorers); + essentialQueue = new DisiPriorityQueue(allScorers.length); + maxScoreSums = new double[allScorers.length]; + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { + collector.setScorer(scorable); + + int windowMin = min; + main: + while (windowMin < max) { + int windowMax = updateMaxWindowScores(windowMin); + windowMax = Math.min(windowMax, max); + if (partitionScorers() == false) { + // No matches in this window + windowMin = windowMax; + continue; + } + + DisiWrapper top = essentialQueue.top(); + while (top.doc < windowMin) { + top.doc = top.iterator.advance(windowMin); + top = essentialQueue.updateTop(); + } + + while (top.doc < windowMax) { + if (acceptDocs == null || acceptDocs.get(top.doc)) { + DisiWrapper topList = essentialQueue.topList(); + double score = topList.scorer.score(); + for (DisiWrapper w = topList.next; w != null; w = w.next) { + score += w.scorer.score(); + } + + boolean possibleMatch = true; + for (int i = firstEssentialScorer - 1; i >= 0; --i) { + float maxPossibleScore = maxScorePropagator.scoreSumUpperBound(score + maxScoreSums[i]); + if (maxPossibleScore < minCompetitiveScore) { + possibleMatch = false; + break; + } + + DisiWrapper scorer = allScorers[i]; + if (scorer.doc < top.doc) { + scorer.doc = scorer.iterator.advance(top.doc); + } + if (scorer.doc == top.doc) { + score += scorer.scorer.score(); + } + } + + if (possibleMatch) { + scorable.doc = top.doc; + scorable.score = (float) score; + collector.collect(top.doc); + } + } + int doc = top.doc; + do { + top.doc = top.iterator.nextDoc(); + top = essentialQueue.updateTop(); + } while (top.doc == doc); + + if (minCompetitiveScoreUpdated) { + minCompetitiveScoreUpdated = false; + if (partitionScorers()) { + top = essentialQueue.top(); + } else { + windowMin = windowMax; + continue main; + } + } + } + windowMin = windowMax; + } + + return nextCandidate(max); + } + + private int updateMaxWindowScores(int windowMin) throws IOException { + // Only use essential scorers to compute the window's max doc ID, in order to avoid constantly + // recomputing max scores over small windows + final int firstWindowLead = Math.min(firstEssentialScorer, allScorers.length - 1); + for (int i = 0; i < firstWindowLead; ++i) { + final DisiWrapper scorer = allScorers[i]; + if (scorer.doc < windowMin) { + scorer.scorer.advanceShallow(windowMin); + } + } + int windowMax = DocIdSetIterator.NO_MORE_DOCS; + for (int i = firstWindowLead; i < allScorers.length; ++i) { + final DisiWrapper scorer = allScorers[i]; + final int upTo = scorer.scorer.advanceShallow(Math.max(scorer.doc, windowMin)); + windowMax = (int) Math.min(windowMax, upTo + 1L); // upTo is inclusive + } + for (DisiWrapper scorer : allScorers) { + if (scorer.doc < windowMax) { + scorer.maxWindowScore = scorer.scorer.getMaxScore(windowMax - 1); + } else { + scorer.maxWindowScore = 0; + } + } + return windowMax; + } + + private boolean partitionScorers() { + Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxWindowScore)); + double maxScoreSum = 0; + for (firstEssentialScorer = 0; + firstEssentialScorer < allScorers.length; + ++firstEssentialScorer) { + maxScoreSum += allScorers[firstEssentialScorer].maxWindowScore; + maxScoreSums[firstEssentialScorer] = maxScoreSum; + float maxScoreSumFloat = + MaxScoreSumPropagator.scoreSumUpperBound(maxScoreSum, firstEssentialScorer + 1); + if (maxScoreSumFloat >= minCompetitiveScore) { + break; + } + } + if (firstEssentialScorer == allScorers.length) { + return false; + } + + essentialQueue.clear(); + for (int i = firstEssentialScorer; i < allScorers.length; ++i) { + essentialQueue.add(allScorers[i]); + } + return true; + } + + /** Return the next candidate on or after {@code rangeEnd}. */ + private int nextCandidate(int rangeEnd) { + int next = DocIdSetIterator.NO_MORE_DOCS; + for (DisiWrapper scorer : allScorers) { + if (scorer.doc < rangeEnd) { + return rangeEnd; + } else { + next = Math.min(next, scorer.doc); + } + } + return next; + } + + @Override + public long cost() { + return cost; + } + + private class ScoreAndDoc extends Scorable { + + float score; + int doc = -1; + + @Override + public int docID() { + return doc; + } + + @Override + public float score() { + return score; + } + + @Override + public void setMinCompetitiveScore(float minScore) throws IOException { + MaxScoreBulkScorer.this.minCompetitiveScore = minScore; + maxScorePropagator.setMinCompetitiveScore(minScore); + minCompetitiveScoreUpdated = true; + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java index 276674a8a8c..676dee04e4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java @@ -167,6 +167,10 @@ final class MaxScoreSumPropagator { } float scoreSumUpperBound(double sum) { + return scoreSumUpperBound(sum, numClauses); + } + + static float scoreSumUpperBound(double sum, int numClauses) { if (numClauses <= 2) { // When there are only two clauses, the sum is always the same regardless // of the order. diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBlockMaxMaxscoreScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBlockMaxMaxscoreScorer.java deleted file mode 100644 index 72c31750e56..00000000000 --- a/lucene/core/src/test/org/apache/lucene/search/TestBlockMaxMaxscoreScorer.java +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.StringField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.Directory; -import org.apache.lucene.tests.util.LuceneTestCase; - -// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept -public class TestBlockMaxMaxscoreScorer extends LuceneTestCase { - private void writeDocuments(Directory dir) throws IOException { - try (IndexWriter w = - new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { - - for (String[] values : - Arrays.asList( - new String[] {"A", "B"}, // 0 - new String[] {"A"}, // 1 - new String[] {}, // 2 - new String[] {"A", "B", "C"}, // 3 - new String[] {"B"}, // 4 - new String[] {"B", "C"} // 5 - )) { - Document doc = new Document(); - for (String value : values) { - doc.add(new StringField("foo", value, Field.Store.NO)); - } - w.addDocument(doc); - } - w.forceMerge(1); - } - } - - public void testBasicsWithTwoDisjunctionClauses() throws Exception { - try (Directory dir = newDirectory()) { - writeDocuments(dir); - - try (IndexReader reader = DirectoryReader.open(dir)) { - IndexSearcher searcher = newSearcher(reader); - - Query query = - new BlockMaxMaxscoreQuery( - new BooleanQuery.Builder() - .add( - new BoostQuery( - new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .build()); - - Scorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .scorer(searcher.getIndexReader().leaves().get(0)); - - assertEquals(0, scorer.iterator().nextDoc()); - assertEquals(2 + 1, scorer.score(), 0); - - assertEquals(1, scorer.iterator().nextDoc()); - assertEquals(2, scorer.score(), 0); - - assertEquals(3, scorer.iterator().nextDoc()); - assertEquals(2 + 1, scorer.score(), 0); - - assertEquals(4, scorer.iterator().nextDoc()); - assertEquals(1, scorer.score(), 0); - - assertEquals(5, scorer.iterator().nextDoc()); - assertEquals(1, scorer.score(), 0); - - assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); - } - } - } - - public void testBasicsWithThreeDisjunctionClauses() throws Exception { - try (Directory dir = newDirectory()) { - writeDocuments(dir); - - try (IndexReader reader = DirectoryReader.open(dir)) { - IndexSearcher searcher = newSearcher(reader); - - Query query = - new BlockMaxMaxscoreQuery( - new BooleanQuery.Builder() - .add( - new BoostQuery( - new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .add( - new BoostQuery( - new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3), - BooleanClause.Occur.SHOULD) - .build()); - - Scorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .scorer(searcher.getIndexReader().leaves().get(0)); - - assertEquals(0, scorer.iterator().nextDoc()); - assertEquals(2 + 1, scorer.score(), 0); - - assertEquals(1, scorer.iterator().nextDoc()); - assertEquals(2, scorer.score(), 0); - - assertEquals(3, scorer.iterator().nextDoc()); - assertEquals(2 + 1 + 3, scorer.score(), 0); - - assertEquals(4, scorer.iterator().nextDoc()); - assertEquals(1, scorer.score(), 0); - - assertEquals(5, scorer.iterator().nextDoc()); - assertEquals(1 + 3, scorer.score(), 0); - - assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); - } - } - } - - public void testBasicsWithFilteredDisjunction() throws Exception { - try (Directory dir = newDirectory()) { - writeDocuments(dir); - - try (IndexReader reader = DirectoryReader.open(dir)) { - IndexSearcher searcher = newSearcher(reader); - - Query query = - new BooleanQuery.Builder() - .add( - new BlockMaxMaxscoreQuery( - new BooleanQuery.Builder() - .add( - new BoostQuery( - new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .build()), - BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.FILTER) - .build(); - - Scorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .scorer(searcher.getIndexReader().leaves().get(0)); - - assertEquals(3, scorer.iterator().nextDoc()); - assertEquals(2 + 1, scorer.score(), 0); - - assertEquals(5, scorer.iterator().nextDoc()); - assertEquals(1, scorer.score(), 0); - - assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); - - scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .scorer(searcher.getIndexReader().leaves().get(0)); - - scorer.setMinCompetitiveScore(2); - - assertEquals(3, scorer.iterator().nextDoc()); - assertEquals(2 + 1, scorer.score(), 0); - - assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); - } - } - } - - public void testBasicsWithExclusion() throws Exception { - try (Directory dir = newDirectory()) { - writeDocuments(dir); - - try (IndexReader reader = DirectoryReader.open(dir)) { - IndexSearcher searcher = newSearcher(reader); - - Query query = - new BooleanQuery.Builder() - .add( - new BlockMaxMaxscoreQuery( - new BooleanQuery.Builder() - .add( - new BoostQuery( - new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .build()), - BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.MUST_NOT) - .build(); - - Scorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .scorer(searcher.getIndexReader().leaves().get(0)); - - assertEquals(0, scorer.iterator().nextDoc()); - assertEquals(2 + 1, scorer.score(), 0); - - assertEquals(1, scorer.iterator().nextDoc()); - assertEquals(2, scorer.score(), 0); - - assertEquals(4, scorer.iterator().nextDoc()); - assertEquals(1, scorer.score(), 0); - - assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); - - scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .scorer(searcher.getIndexReader().leaves().get(0)); - - scorer.setMinCompetitiveScore(3); - - assertEquals(0, scorer.iterator().nextDoc()); - assertEquals(2 + 1, scorer.score(), 0); - - assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); - } - } - } - - private static class BlockMaxMaxscoreQuery extends Query { - private final BooleanQuery query; - - private BlockMaxMaxscoreQuery(BooleanQuery query) { - assert query.isPureDisjunction() - : "This test utility query is only used to create BlockMaxMaxscoreScorer for disjunctions."; - assert query.clauses().size() >= 2 - : "There must be at least two optional clauses to use this test utility query."; - this.query = query; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - return new Weight(query) { - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - // no-ops - return null; - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - BooleanWeight weight = (BooleanWeight) query.createWeight(searcher, scoreMode, boost); - List optionalScorers = - weight.weightedClauses.stream() - .map(wc -> wc.weight) - .map( - w -> { - try { - return w.scorerSupplier(context); - } catch (IOException e) { - throw new AssertionError(e); - } - }) - .map( - ss -> { - try { - return ss.get(Long.MAX_VALUE); - } catch (IOException e) { - throw new AssertionError(e); - } - }) - .toList(); - - return new BlockMaxMaxscoreScorer(weight, optionalScorers); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return false; - } - }; - } - - @Override - public String toString(String field) { - return "BlockMaxMaxscoreQuery"; - } - - @Override - public void visit(QueryVisitor visitor) { - // no-ops - } - - @Override - public boolean equals(Object other) { - return sameClassAs(other) && query.equals(((BlockMaxMaxscoreQuery) other).query); - } - - @Override - public int hashCode() { - return 31 * classHash() + query.hashCode(); - } - } -} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java new file mode 100644 index 00000000000..f9eb96dc1e5 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java @@ -0,0 +1,325 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Arrays; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.LuceneTestCase; + +// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept +public class TestMaxScoreBulkScorer extends LuceneTestCase { + + private void writeDocuments(Directory dir) throws IOException { + try (IndexWriter w = + new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { + + for (String[] values : + Arrays.asList( + new String[] {"A", "B"}, // 0 + new String[] {"A"}, // 1 + new String[] {}, // 2 + new String[] {"A", "B", "C"}, // 3 + new String[] {"B"}, // 4 + new String[] {"B", "C"} // 5 + )) { + Document doc = new Document(); + for (String value : values) { + doc.add(new StringField("foo", value, Field.Store.NO)); + } + w.addDocument(doc); + } + w.forceMerge(1); + } + } + + public void testBasicsWithTwoDisjunctionClauses() throws Exception { + try (Directory dir = newDirectory()) { + writeDocuments(dir); + + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + + Query query = + new BooleanQuery.Builder() + .add( + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), + BooleanClause.Occur.SHOULD) + .add( + new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), + BooleanClause.Occur.SHOULD) + .build(); + + BulkScorer scorer = + searcher + .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) + .bulkScorer(searcher.getIndexReader().leaves().get(0)); + + scorer.score( + new LeafCollector() { + + private int i; + private Scorable scorer; + + @Override + public void setScorer(Scorable scorer) throws IOException { + this.scorer = scorer; + } + + @Override + public void collect(int doc) throws IOException { + switch (i++) { + case 0: + assertEquals(0, doc); + assertEquals(2 + 1, scorer.score(), 0); + break; + case 1: + assertEquals(1, doc); + assertEquals(2, scorer.score(), 0); + break; + case 2: + assertEquals(3, doc); + assertEquals(2 + 1, scorer.score(), 0); + break; + case 3: + assertEquals(4, doc); + assertEquals(1, scorer.score(), 0); + break; + case 4: + assertEquals(5, doc); + assertEquals(1, scorer.score(), 0); + break; + default: + fail(); + break; + } + } + }, + null); + } + } + } + + public void testBasicsWithTwoDisjunctionClausesAndSkipping() throws Exception { + try (Directory dir = newDirectory()) { + writeDocuments(dir); + + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + + Query query = + new BooleanQuery.Builder() + .add( + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), + BooleanClause.Occur.SHOULD) + .add( + new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), + BooleanClause.Occur.SHOULD) + .build(); + + BulkScorer scorer = + searcher + .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) + .bulkScorer(searcher.getIndexReader().leaves().get(0)); + + scorer.score( + new LeafCollector() { + + private int i; + private Scorable scorer; + + @Override + public void setScorer(Scorable scorer) throws IOException { + this.scorer = scorer; + } + + @Override + public void collect(int doc) throws IOException { + switch (i++) { + case 0: + assertEquals(0, doc); + assertEquals(2 + 1, scorer.score(), 0); + break; + case 1: + assertEquals(1, doc); + assertEquals(2, scorer.score(), 0); + // simulate top-2 retrieval + scorer.setMinCompetitiveScore(Math.nextUp(2)); + break; + case 2: + assertEquals(3, doc); + assertEquals(2 + 1, scorer.score(), 0); + scorer.setMinCompetitiveScore(Math.nextUp(2 + 1)); + break; + default: + fail(); + break; + } + } + }, + null); + } + } + } + + public void testBasicsWithThreeDisjunctionClauses() throws Exception { + try (Directory dir = newDirectory()) { + writeDocuments(dir); + + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + + Query query = + new BooleanQuery.Builder() + .add( + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), + BooleanClause.Occur.SHOULD) + .add( + new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), + BooleanClause.Occur.SHOULD) + .add( + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3), + BooleanClause.Occur.SHOULD) + .build(); + + BulkScorer scorer = + searcher + .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) + .bulkScorer(searcher.getIndexReader().leaves().get(0)); + + scorer.score( + new LeafCollector() { + + private int i; + private Scorable scorer; + + @Override + public void setScorer(Scorable scorer) throws IOException { + this.scorer = scorer; + } + + @Override + public void collect(int doc) throws IOException { + switch (i++) { + case 0: + assertEquals(0, doc); + assertEquals(2 + 1, scorer.score(), 0); + break; + case 1: + assertEquals(1, doc); + assertEquals(2, scorer.score(), 0); + break; + case 2: + assertEquals(3, doc); + assertEquals(2 + 1 + 3, scorer.score(), 0); + break; + case 3: + assertEquals(4, doc); + assertEquals(1, scorer.score(), 0); + break; + case 4: + assertEquals(5, doc); + assertEquals(1 + 3, scorer.score(), 0); + break; + default: + fail(); + break; + } + } + }, + null); + } + } + } + + public void testBasicsWithThreeDisjunctionClausesAndSkipping() throws Exception { + try (Directory dir = newDirectory()) { + writeDocuments(dir); + + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + + Query query = + new BooleanQuery.Builder() + .add( + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), + BooleanClause.Occur.SHOULD) + .add( + new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), + BooleanClause.Occur.SHOULD) + .add( + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3), + BooleanClause.Occur.SHOULD) + .build(); + + BulkScorer scorer = + searcher + .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) + .bulkScorer(searcher.getIndexReader().leaves().get(0)); + + scorer.score( + new LeafCollector() { + + private int i; + private Scorable scorer; + + @Override + public void setScorer(Scorable scorer) throws IOException { + this.scorer = scorer; + } + + @Override + public void collect(int doc) throws IOException { + switch (i++) { + case 0: + assertEquals(0, doc); + assertEquals(2 + 1, scorer.score(), 0); + break; + case 1: + assertEquals(1, doc); + assertEquals(2, scorer.score(), 0); + // simulate top-2 retrieval + scorer.setMinCompetitiveScore(Math.nextUp(2)); + break; + case 2: + assertEquals(3, doc); + assertEquals(2 + 1 + 3, scorer.score(), 0); + scorer.setMinCompetitiveScore(Math.nextUp(2 + 1)); + break; + case 3: + assertEquals(5, doc); + assertEquals(1 + 3, scorer.score(), 0); + scorer.setMinCompetitiveScore(Math.nextUp(1 + 3)); + break; + default: + fail(); + break; + } + } + }, + null); + } + } + } +}