LUCENE-4923: remove minShouldMatch/speed up DisjunctionSumScorer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466545 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-04-10 16:21:07 +00:00
parent e27b3f24c0
commit b2348c8494
4 changed files with 49 additions and 86 deletions

View File

@ -196,6 +196,9 @@ Optimizations
* LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
non-array-lookup version. (Dawid Weiss)
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
(Robert Muir)
API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use

View File

@ -346,11 +346,21 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return null;
}
// simple conjunction
if (optional.size() == 0 && prohibited.size() == 0) {
float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
}
// simple disjunction
if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
float coord[] = new float[optional.size()+1];
for (int i = 0; i < coord.length; i++) {
coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
}
return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
}
// Return a BooleanScorer2
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
}

View File

@ -159,38 +159,19 @@ class BooleanScorer2 extends Scorer {
// each scorer from the list counted as a single matcher
if (minNrShouldMatch > 1) {
return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) {
private int lastScoredDoc = -1;
// Save the score of lastScoredDoc, so that we don't compute it more than
// once in score().
private float lastDocScore = Float.NaN;
@Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += super.nrMatchers;
}
return lastDocScore;
@Override
public float score() throws IOException {
coordinator.nrMatchers += super.nrMatchers;
return super.score();
}
};
} else {
return new DisjunctionSumScorer(weight, scorers) {
private int lastScoredDoc = -1;
// Save the score of lastScoredDoc, so that we don't compute it more than
// once in score().
private float lastDocScore = Float.NaN;
@Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += super.nrMatchers;
}
return lastDocScore;
// we pass null for coord[] since we coordinate ourselves and override score()
return new DisjunctionSumScorer(weight, scorers.toArray(new Scorer[scorers.size()]), null) {
@Override
public float score() throws IOException {
coordinator.nrMatchers += super.nrMatchers;
return (float) super.score;
}
};
}

View File

@ -17,84 +17,58 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.util.List;
import java.io.IOException;
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
class DisjunctionSumScorer extends DisjunctionScorer {
/** The minimum number of scorers that should match. */
private final int minimumNrMatchers;
/** The document number of the current match. */
private int doc = -1;
/** The number of subscorers that provide the current match. */
protected int nrMatchers = -1;
private double score = Float.NaN;
protected double score = Float.NaN;
private final float[] coord;
/** Construct a <code>DisjunctionScorer</code>.
* @param weight The weight to be used.
* @param subScorers A collection of at least two subscorers.
* @param minimumNrMatchers The positive minimum number of subscorers that should
* match to match this query.
* <br>When <code>minimumNrMatchers</code> is bigger than
* the number of <code>subScorers</code>,
* no matches will be produced.
* <br>When minimumNrMatchers equals the number of subScorers,
* it more efficient to use <code>ConjunctionScorer</code>.
* @param subScorers Array of at least two subscorers.
* @param coord Table of coordination factors
*/
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers) throws IOException {
super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size());
DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
super(weight, subScorers, subScorers.length);
if (minimumNrMatchers <= 0) {
throw new IllegalArgumentException("Minimum nr of matchers must be positive");
}
if (numScorers <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
this.minimumNrMatchers = minimumNrMatchers;
}
/** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
* of matching subscorers.
*/
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers) throws IOException {
this(weight, subScorers, 1);
this.coord = coord;
}
@Override
public int nextDoc() throws IOException {
assert doc != NO_MORE_DOCS;
while(true) {
while (subScorers[0].docID() == doc) {
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
heapAdjust(0);
} else {
heapRemoveRoot();
if (numScorers < minimumNrMatchers) {
return doc = NO_MORE_DOCS;
}
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
heapAdjust(0);
} else {
heapRemoveRoot();
if (numScorers == 0) {
return doc = NO_MORE_DOCS;
}
}
afterNext();
if (nrMatchers >= minimumNrMatchers) {
break;
if (subScorers[0].docID() != doc) {
afterNext();
return doc;
}
}
return doc;
}
private void afterNext() throws IOException {
final Scorer sub = subScorers[0];
doc = sub.docID();
if (doc == NO_MORE_DOCS) {
nrMatchers = Integer.MAX_VALUE; // stop looping
} else {
if (doc != NO_MORE_DOCS) {
score = sub.score();
nrMatchers = 1;
countMatches(1);
@ -104,9 +78,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
// TODO: this currently scores, but so did the previous impl
// TODO: remove recursion.
// TODO: if we separate scoring, out of here, modify this
// and afterNext() to terminate when nrMatchers == minimumNrMatchers
// then also change freq() to just always compute it from scratch
// TODO: if we separate scoring, out of here,
// then change freq() to just always compute it from scratch
private void countMatches(int root) throws IOException {
if (root < numScorers && subScorers[root].docID() == doc) {
nrMatchers++;
@ -121,7 +94,7 @@ class DisjunctionSumScorer extends DisjunctionScorer {
*/
@Override
public float score() throws IOException {
return (float)score;
return (float)score * coord[nrMatchers];
}
@Override
@ -146,8 +119,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
*/
@Override
public int advance(int target) throws IOException {
if (numScorers == 0) return doc = NO_MORE_DOCS;
while (subScorers[0].docID() < target) {
assert doc != NO_MORE_DOCS;
while(true) {
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
heapAdjust(0);
} else {
@ -156,14 +129,10 @@ class DisjunctionSumScorer extends DisjunctionScorer {
return doc = NO_MORE_DOCS;
}
}
}
afterNext();
if (nrMatchers >= minimumNrMatchers) {
return doc;
} else {
return nextDoc();
if (subScorers[0].docID() >= target) {
afterNext();
return doc;
}
}
}
}