LUCENE-4923: remove minShouldMatch/speed up DisjunctionSumScorer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466545 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-04-10 16:21:07 +00:00
parent e27b3f24c0
commit b2348c8494
4 changed files with 49 additions and 86 deletions

View File

@ -196,6 +196,9 @@ Optimizations
* LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a * LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
non-array-lookup version. (Dawid Weiss) non-array-lookup version. (Dawid Weiss)
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
(Robert Muir)
API Changes API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use * LUCENE-4844: removed TaxonomyReader.getParent(), you should use

View File

@ -346,11 +346,21 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return null; return null;
} }
// simple conjunction
if (optional.size() == 0 && prohibited.size() == 0) { if (optional.size() == 0 && prohibited.size() == 0) {
float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord); float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord); return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
} }
// simple disjunction
if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
float coord[] = new float[optional.size()+1];
for (int i = 0; i < coord.length; i++) {
coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
}
return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
}
// Return a BooleanScorer2 // Return a BooleanScorer2
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord); return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
} }

View File

@ -159,38 +159,19 @@ class BooleanScorer2 extends Scorer {
// each scorer from the list counted as a single matcher // each scorer from the list counted as a single matcher
if (minNrShouldMatch > 1) { if (minNrShouldMatch > 1) {
return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) { return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) {
private int lastScoredDoc = -1; @Override
// Save the score of lastScoredDoc, so that we don't compute it more than public float score() throws IOException {
// once in score().
private float lastDocScore = Float.NaN;
@Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += super.nrMatchers; coordinator.nrMatchers += super.nrMatchers;
} return super.score();
return lastDocScore;
} }
}; };
} else { } else {
return new DisjunctionSumScorer(weight, scorers) { // we pass null for coord[] since we coordinate ourselves and override score()
private int lastScoredDoc = -1; return new DisjunctionSumScorer(weight, scorers.toArray(new Scorer[scorers.size()]), null) {
// Save the score of lastScoredDoc, so that we don't compute it more than @Override
// once in score(). public float score() throws IOException {
private float lastDocScore = Float.NaN;
@Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += super.nrMatchers; coordinator.nrMatchers += super.nrMatchers;
} return (float) super.score;
return lastDocScore;
} }
}; };
} }

View File

@ -17,84 +17,58 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import java.util.List;
import java.io.IOException; import java.io.IOException;
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>. /** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/ */
class DisjunctionSumScorer extends DisjunctionScorer { class DisjunctionSumScorer extends DisjunctionScorer {
/** The minimum number of scorers that should match. */
private final int minimumNrMatchers;
/** The document number of the current match. */ /** The document number of the current match. */
private int doc = -1; private int doc = -1;
/** The number of subscorers that provide the current match. */ /** The number of subscorers that provide the current match. */
protected int nrMatchers = -1; protected int nrMatchers = -1;
private double score = Float.NaN; protected double score = Float.NaN;
private final float[] coord;
/** Construct a <code>DisjunctionScorer</code>. /** Construct a <code>DisjunctionScorer</code>.
* @param weight The weight to be used. * @param weight The weight to be used.
* @param subScorers A collection of at least two subscorers. * @param subScorers Array of at least two subscorers.
* @param minimumNrMatchers The positive minimum number of subscorers that should * @param coord Table of coordination factors
* match to match this query.
* <br>When <code>minimumNrMatchers</code> is bigger than
* the number of <code>subScorers</code>,
* no matches will be produced.
* <br>When minimumNrMatchers equals the number of subScorers,
* it more efficient to use <code>ConjunctionScorer</code>.
*/ */
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers) throws IOException { DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size()); super(weight, subScorers, subScorers.length);
if (minimumNrMatchers <= 0) {
throw new IllegalArgumentException("Minimum nr of matchers must be positive");
}
if (numScorers <= 1) { if (numScorers <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers"); throw new IllegalArgumentException("There must be at least 2 subScorers");
} }
this.coord = coord;
this.minimumNrMatchers = minimumNrMatchers;
}
/** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
* of matching subscorers.
*/
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers) throws IOException {
this(weight, subScorers, 1);
} }
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {
assert doc != NO_MORE_DOCS; assert doc != NO_MORE_DOCS;
while(true) { while(true) {
while (subScorers[0].docID() == doc) {
if (subScorers[0].nextDoc() != NO_MORE_DOCS) { if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
heapAdjust(0); heapAdjust(0);
} else { } else {
heapRemoveRoot(); heapRemoveRoot();
if (numScorers < minimumNrMatchers) { if (numScorers == 0) {
return doc = NO_MORE_DOCS; return doc = NO_MORE_DOCS;
} }
} }
} if (subScorers[0].docID() != doc) {
afterNext(); afterNext();
if (nrMatchers >= minimumNrMatchers) {
break;
}
}
return doc; return doc;
} }
}
}
private void afterNext() throws IOException { private void afterNext() throws IOException {
final Scorer sub = subScorers[0]; final Scorer sub = subScorers[0];
doc = sub.docID(); doc = sub.docID();
if (doc == NO_MORE_DOCS) { if (doc != NO_MORE_DOCS) {
nrMatchers = Integer.MAX_VALUE; // stop looping
} else {
score = sub.score(); score = sub.score();
nrMatchers = 1; nrMatchers = 1;
countMatches(1); countMatches(1);
@ -104,9 +78,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
// TODO: this currently scores, but so did the previous impl // TODO: this currently scores, but so did the previous impl
// TODO: remove recursion. // TODO: remove recursion.
// TODO: if we separate scoring, out of here, modify this // TODO: if we separate scoring, out of here,
// and afterNext() to terminate when nrMatchers == minimumNrMatchers // then change freq() to just always compute it from scratch
// then also change freq() to just always compute it from scratch
private void countMatches(int root) throws IOException { private void countMatches(int root) throws IOException {
if (root < numScorers && subScorers[root].docID() == doc) { if (root < numScorers && subScorers[root].docID() == doc) {
nrMatchers++; nrMatchers++;
@ -121,7 +94,7 @@ class DisjunctionSumScorer extends DisjunctionScorer {
*/ */
@Override @Override
public float score() throws IOException { public float score() throws IOException {
return (float)score; return (float)score * coord[nrMatchers];
} }
@Override @Override
@ -146,8 +119,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
*/ */
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
if (numScorers == 0) return doc = NO_MORE_DOCS; assert doc != NO_MORE_DOCS;
while (subScorers[0].docID() < target) { while(true) {
if (subScorers[0].advance(target) != NO_MORE_DOCS) { if (subScorers[0].advance(target) != NO_MORE_DOCS) {
heapAdjust(0); heapAdjust(0);
} else { } else {
@ -156,14 +129,10 @@ class DisjunctionSumScorer extends DisjunctionScorer {
return doc = NO_MORE_DOCS; return doc = NO_MORE_DOCS;
} }
} }
} if (subScorers[0].docID() >= target) {
afterNext(); afterNext();
if (nrMatchers >= minimumNrMatchers) {
return doc; return doc;
} else { }
return nextDoc();
} }
} }
} }