mirror of https://github.com/apache/lucene.git
LUCENE-4923: remove minShouldMatch/speed up DisjunctionSumScorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466545 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e27b3f24c0
commit
b2348c8494
|
@ -196,6 +196,9 @@ Optimizations
|
|||
* LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
|
||||
non-array-lookup version. (Dawid Weiss)
|
||||
|
||||
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
|
||||
(Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
||||
|
|
|
@ -346,11 +346,21 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
return null;
|
||||
}
|
||||
|
||||
// simple conjunction
|
||||
if (optional.size() == 0 && prohibited.size() == 0) {
|
||||
float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
|
||||
return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
|
||||
}
|
||||
|
||||
// simple disjunction
|
||||
if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
|
||||
float coord[] = new float[optional.size()+1];
|
||||
for (int i = 0; i < coord.length; i++) {
|
||||
coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
|
||||
}
|
||||
return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
|
||||
}
|
||||
|
||||
// Return a BooleanScorer2
|
||||
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
|
||||
}
|
||||
|
|
|
@ -159,38 +159,19 @@ class BooleanScorer2 extends Scorer {
|
|||
// each scorer from the list counted as a single matcher
|
||||
if (minNrShouldMatch > 1) {
|
||||
return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) {
|
||||
private int lastScoredDoc = -1;
|
||||
// Save the score of lastScoredDoc, so that we don't compute it more than
|
||||
// once in score().
|
||||
private float lastDocScore = Float.NaN;
|
||||
@Override public float score() throws IOException {
|
||||
int doc = docID();
|
||||
if (doc >= lastScoredDoc) {
|
||||
if (doc > lastScoredDoc) {
|
||||
lastDocScore = super.score();
|
||||
lastScoredDoc = doc;
|
||||
}
|
||||
coordinator.nrMatchers += super.nrMatchers;
|
||||
}
|
||||
return lastDocScore;
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
coordinator.nrMatchers += super.nrMatchers;
|
||||
return super.score();
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return new DisjunctionSumScorer(weight, scorers) {
|
||||
private int lastScoredDoc = -1;
|
||||
// Save the score of lastScoredDoc, so that we don't compute it more than
|
||||
// once in score().
|
||||
private float lastDocScore = Float.NaN;
|
||||
@Override public float score() throws IOException {
|
||||
int doc = docID();
|
||||
if (doc >= lastScoredDoc) {
|
||||
if (doc > lastScoredDoc) {
|
||||
lastDocScore = super.score();
|
||||
lastScoredDoc = doc;
|
||||
}
|
||||
coordinator.nrMatchers += super.nrMatchers;
|
||||
}
|
||||
return lastDocScore;
|
||||
// we pass null for coord[] since we coordinate ourselves and override score()
|
||||
return new DisjunctionSumScorer(weight, scorers.toArray(new Scorer[scorers.size()]), null) {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
coordinator.nrMatchers += super.nrMatchers;
|
||||
return (float) super.score;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -17,84 +17,58 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
import java.io.IOException;
|
||||
|
||||
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
|
||||
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
|
||||
*/
|
||||
class DisjunctionSumScorer extends DisjunctionScorer {
|
||||
/** The minimum number of scorers that should match. */
|
||||
private final int minimumNrMatchers;
|
||||
|
||||
/** The document number of the current match. */
|
||||
private int doc = -1;
|
||||
|
||||
/** The number of subscorers that provide the current match. */
|
||||
protected int nrMatchers = -1;
|
||||
|
||||
private double score = Float.NaN;
|
||||
protected double score = Float.NaN;
|
||||
private final float[] coord;
|
||||
|
||||
/** Construct a <code>DisjunctionScorer</code>.
|
||||
* @param weight The weight to be used.
|
||||
* @param subScorers A collection of at least two subscorers.
|
||||
* @param minimumNrMatchers The positive minimum number of subscorers that should
|
||||
* match to match this query.
|
||||
* <br>When <code>minimumNrMatchers</code> is bigger than
|
||||
* the number of <code>subScorers</code>,
|
||||
* no matches will be produced.
|
||||
* <br>When minimumNrMatchers equals the number of subScorers,
|
||||
* it more efficient to use <code>ConjunctionScorer</code>.
|
||||
* @param subScorers Array of at least two subscorers.
|
||||
* @param coord Table of coordination factors
|
||||
*/
|
||||
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers) throws IOException {
|
||||
super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size());
|
||||
DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
|
||||
super(weight, subScorers, subScorers.length);
|
||||
|
||||
if (minimumNrMatchers <= 0) {
|
||||
throw new IllegalArgumentException("Minimum nr of matchers must be positive");
|
||||
}
|
||||
if (numScorers <= 1) {
|
||||
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
||||
}
|
||||
|
||||
this.minimumNrMatchers = minimumNrMatchers;
|
||||
}
|
||||
|
||||
/** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
|
||||
* of matching subscorers.
|
||||
*/
|
||||
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers) throws IOException {
|
||||
this(weight, subScorers, 1);
|
||||
this.coord = coord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
assert doc != NO_MORE_DOCS;
|
||||
while(true) {
|
||||
while (subScorers[0].docID() == doc) {
|
||||
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
||||
heapAdjust(0);
|
||||
} else {
|
||||
heapRemoveRoot();
|
||||
if (numScorers < minimumNrMatchers) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
||||
heapAdjust(0);
|
||||
} else {
|
||||
heapRemoveRoot();
|
||||
if (numScorers == 0) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
afterNext();
|
||||
if (nrMatchers >= minimumNrMatchers) {
|
||||
break;
|
||||
if (subScorers[0].docID() != doc) {
|
||||
afterNext();
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
private void afterNext() throws IOException {
|
||||
final Scorer sub = subScorers[0];
|
||||
doc = sub.docID();
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
nrMatchers = Integer.MAX_VALUE; // stop looping
|
||||
} else {
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
score = sub.score();
|
||||
nrMatchers = 1;
|
||||
countMatches(1);
|
||||
|
@ -104,9 +78,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
|||
|
||||
// TODO: this currently scores, but so did the previous impl
|
||||
// TODO: remove recursion.
|
||||
// TODO: if we separate scoring, out of here, modify this
|
||||
// and afterNext() to terminate when nrMatchers == minimumNrMatchers
|
||||
// then also change freq() to just always compute it from scratch
|
||||
// TODO: if we separate scoring, out of here,
|
||||
// then change freq() to just always compute it from scratch
|
||||
private void countMatches(int root) throws IOException {
|
||||
if (root < numScorers && subScorers[root].docID() == doc) {
|
||||
nrMatchers++;
|
||||
|
@ -121,7 +94,7 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
|||
*/
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return (float)score;
|
||||
return (float)score * coord[nrMatchers];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -146,8 +119,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
|||
*/
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (numScorers == 0) return doc = NO_MORE_DOCS;
|
||||
while (subScorers[0].docID() < target) {
|
||||
assert doc != NO_MORE_DOCS;
|
||||
while(true) {
|
||||
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
|
||||
heapAdjust(0);
|
||||
} else {
|
||||
|
@ -156,14 +129,10 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
|||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
afterNext();
|
||||
|
||||
if (nrMatchers >= minimumNrMatchers) {
|
||||
return doc;
|
||||
} else {
|
||||
return nextDoc();
|
||||
if (subScorers[0].docID() >= target) {
|
||||
afterNext();
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue