mirror of https://github.com/apache/lucene.git
LUCENE-8922: Better impacts for DisjunctionMaxQuery. (#791)
This commit is contained in:
parent
85814e262c
commit
1ea8419336
|
@ -71,6 +71,11 @@ Improvements
|
|||
* LUCENE-8916: GraphTokenStreamFiniteStrings preserves all Token attributes
|
||||
through its finite strings TokenStreams (Alan Woodward)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-8922: DisjunctionMaxQuery more efficiently leverages impacts to skip
|
||||
non-competitive hits. (Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs.
|
||||
|
|
|
@ -21,8 +21,6 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.util.MathUtil;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
/**
|
||||
* The Scorer for DisjunctionMaxQuery. The union of all documents generated by the subquery scorers
|
||||
* is generated in document number order. The score for each document is the maximum of the scores computed
|
||||
|
@ -34,6 +32,8 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
|
||||
private final float tieBreakerMultiplier;
|
||||
|
||||
private final DisjunctionScoreBlockBoundaryPropagator disjunctionBlockPropagator;
|
||||
|
||||
/**
|
||||
* Creates a new instance of DisjunctionMaxScorer
|
||||
*
|
||||
|
@ -52,6 +52,11 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
if (tieBreakerMultiplier < 0 || tieBreakerMultiplier > 1) {
|
||||
throw new IllegalArgumentException("tieBreakerMultiplier must be in [0, 1]");
|
||||
}
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
this.disjunctionBlockPropagator = new DisjunctionScoreBlockBoundaryPropagator(subScorers);
|
||||
} else {
|
||||
this.disjunctionBlockPropagator = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -72,15 +77,7 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
int upTo = NO_MORE_DOCS;
|
||||
for (Scorer scorer : subScorers) {
|
||||
if (scorer.docID() <= target) {
|
||||
upTo = Math.min(scorer.advanceShallow(target), upTo);
|
||||
} else if (scorer.docID() < NO_MORE_DOCS) {
|
||||
upTo = Math.min(scorer.docID()-1, upTo);
|
||||
}
|
||||
}
|
||||
return upTo;
|
||||
return disjunctionBlockPropagator.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,7 +109,14 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) {
|
||||
public void setMinCompetitiveScore(float minScore) throws IOException {
|
||||
getBlockMaxApprox().setMinCompetitiveScore(minScore);
|
||||
disjunctionBlockPropagator.setMinCompetitiveScore(minScore);
|
||||
if (tieBreakerMultiplier == 0) {
|
||||
// TODO: we could even remove some scorers from the priority queue?
|
||||
for (Scorer scorer : subScorers) {
|
||||
scorer.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* A helper to propagate block boundaries for disjunctions.
|
||||
* Because a disjunction matches if any of its sub clauses matches, it is
|
||||
* tempting to return the minimum block boundary across all clauses. The problem
|
||||
* is that it might then make the query slow when the minimum competitive score
|
||||
* is high and low-scoring clauses don't drive iteration anymore. So this class
|
||||
* computes block boundaries only across clauses whose maximum score is greater
|
||||
* than or equal to the minimum competitive score, or the maximum scoring clause
|
||||
* if there is no such clause.
|
||||
*/
|
||||
final class DisjunctionScoreBlockBoundaryPropagator {
|
||||
|
||||
private static final Comparator<Scorer> MAX_SCORE_COMPARATOR = Comparator.comparing((Scorer s) -> {
|
||||
try {
|
||||
return s.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).thenComparing(Comparator.comparing(s -> s.iterator().cost()));
|
||||
|
||||
private final Scorer[] scorers;
|
||||
private final float[] maxScores;
|
||||
private int leadIndex = 0;
|
||||
|
||||
DisjunctionScoreBlockBoundaryPropagator(Collection<Scorer> scorers) throws IOException {
|
||||
this.scorers = scorers.toArray(Scorer[]::new);
|
||||
for (Scorer scorer : this.scorers) {
|
||||
scorer.advanceShallow(0);
|
||||
}
|
||||
Arrays.sort(this.scorers, MAX_SCORE_COMPARATOR);
|
||||
|
||||
maxScores = new float[this.scorers.length];
|
||||
for (int i = 0; i < this.scorers.length; ++i) {
|
||||
maxScores[i] = this.scorers[i].getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link Scorer#advanceShallow(int)}.
|
||||
*/
|
||||
int advanceShallow(int target) throws IOException {
|
||||
// For scorers that are below the lead index, just propagate.
|
||||
for (int i = 0; i < leadIndex; ++i) {
|
||||
Scorer s = scorers[i];
|
||||
if (s.docID() < target) {
|
||||
s.advanceShallow(target);
|
||||
}
|
||||
}
|
||||
|
||||
// For scorers above the lead index, we take the minimum
|
||||
// boundary.
|
||||
Scorer leadScorer = scorers[leadIndex];
|
||||
int upTo = leadScorer.advanceShallow(Math.max(leadScorer.docID(), target));
|
||||
|
||||
for (int i = leadIndex + 1; i < scorers.length; ++i) {
|
||||
Scorer scorer = scorers[i];
|
||||
if (scorer.docID() <= target) {
|
||||
upTo = Math.min(scorer.advanceShallow(target), upTo);
|
||||
}
|
||||
}
|
||||
|
||||
// If the maximum scoring clauses are beyond `target`, then we use their
|
||||
// docID as a boundary. It helps not consider them when computing the
|
||||
// maximum score and get a lower score upper bound.
|
||||
for (int i = scorers.length - 1; i > leadIndex; --i) {
|
||||
Scorer scorer = scorers[i];
|
||||
if (scorer.docID() > target) {
|
||||
upTo = Math.min(upTo, scorer.docID() - 1);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return upTo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the minimum competitive score to filter out clauses that score less
|
||||
* than this threshold.
|
||||
* @see Scorer#setMinCompetitiveScore
|
||||
*/
|
||||
void setMinCompetitiveScore(float minScore) throws IOException {
|
||||
// Update the lead index if necessary
|
||||
while (leadIndex < maxScores.length - 1 && minScore > maxScores[leadIndex]) {
|
||||
leadIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestDisjunctionScoreBlockBoundaryPropagator extends LuceneTestCase {
|
||||
|
||||
private static class FakeWeight extends Weight {
|
||||
|
||||
FakeWeight() {
|
||||
super(new MatchNoDocsQuery());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static class FakeScorer extends Scorer {
|
||||
|
||||
final int boundary;
|
||||
final float maxScore;
|
||||
|
||||
FakeScorer(int boundary, float maxScore) throws IOException {
|
||||
super(new FakeWeight());
|
||||
this.boundary = boundary;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minCompetitiveScore) {}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) {
|
||||
assert target <= boundary;
|
||||
return boundary;
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasics() throws IOException {
|
||||
Scorer scorer1 = new FakeScorer(20, 0.5f);
|
||||
Scorer scorer2 = new FakeScorer(50, 1.5f);
|
||||
Scorer scorer3 = new FakeScorer(30, 2f);
|
||||
Scorer scorer4 = new FakeScorer(80, 3f);
|
||||
List<Scorer> scorers = Arrays.asList(scorer1, scorer2, scorer3, scorer4);
|
||||
Collections.shuffle(scorers, random());
|
||||
DisjunctionScoreBlockBoundaryPropagator propagator = new DisjunctionScoreBlockBoundaryPropagator(scorers);
|
||||
assertEquals(20, propagator.advanceShallow(0));
|
||||
|
||||
propagator.setMinCompetitiveScore(0.2f);
|
||||
assertEquals(20, propagator.advanceShallow(0));
|
||||
|
||||
propagator.setMinCompetitiveScore(0.7f);
|
||||
assertEquals(30, propagator.advanceShallow(0));
|
||||
|
||||
propagator.setMinCompetitiveScore(1.2f);
|
||||
assertEquals(30, propagator.advanceShallow(0));
|
||||
|
||||
propagator.setMinCompetitiveScore(1.7f);
|
||||
assertEquals(30, propagator.advanceShallow(0));
|
||||
|
||||
propagator.setMinCompetitiveScore(2.2f);
|
||||
assertEquals(80, propagator.advanceShallow(0));
|
||||
|
||||
propagator.setMinCompetitiveScore(5f);
|
||||
assertEquals(80, propagator.advanceShallow(0));
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue