mirror of https://github.com/apache/lucene.git
LUCENE-4923: remove minShouldMatch/speed up DisjunctionSumScorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466545 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e27b3f24c0
commit
b2348c8494
|
@ -196,6 +196,9 @@ Optimizations
|
||||||
* LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
|
* LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
|
||||||
non-array-lookup version. (Dawid Weiss)
|
non-array-lookup version. (Dawid Weiss)
|
||||||
|
|
||||||
|
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
||||||
|
|
|
@ -346,11 +346,21 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// simple conjunction
|
||||||
if (optional.size() == 0 && prohibited.size() == 0) {
|
if (optional.size() == 0 && prohibited.size() == 0) {
|
||||||
float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
|
float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
|
||||||
return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
|
return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// simple disjunction
|
||||||
|
if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
|
||||||
|
float coord[] = new float[optional.size()+1];
|
||||||
|
for (int i = 0; i < coord.length; i++) {
|
||||||
|
coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
|
||||||
|
}
|
||||||
|
return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
|
||||||
|
}
|
||||||
|
|
||||||
// Return a BooleanScorer2
|
// Return a BooleanScorer2
|
||||||
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
|
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,38 +159,19 @@ class BooleanScorer2 extends Scorer {
|
||||||
// each scorer from the list counted as a single matcher
|
// each scorer from the list counted as a single matcher
|
||||||
if (minNrShouldMatch > 1) {
|
if (minNrShouldMatch > 1) {
|
||||||
return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) {
|
return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) {
|
||||||
private int lastScoredDoc = -1;
|
@Override
|
||||||
// Save the score of lastScoredDoc, so that we don't compute it more than
|
public float score() throws IOException {
|
||||||
// once in score().
|
|
||||||
private float lastDocScore = Float.NaN;
|
|
||||||
@Override public float score() throws IOException {
|
|
||||||
int doc = docID();
|
|
||||||
if (doc >= lastScoredDoc) {
|
|
||||||
if (doc > lastScoredDoc) {
|
|
||||||
lastDocScore = super.score();
|
|
||||||
lastScoredDoc = doc;
|
|
||||||
}
|
|
||||||
coordinator.nrMatchers += super.nrMatchers;
|
coordinator.nrMatchers += super.nrMatchers;
|
||||||
}
|
return super.score();
|
||||||
return lastDocScore;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
return new DisjunctionSumScorer(weight, scorers) {
|
// we pass null for coord[] since we coordinate ourselves and override score()
|
||||||
private int lastScoredDoc = -1;
|
return new DisjunctionSumScorer(weight, scorers.toArray(new Scorer[scorers.size()]), null) {
|
||||||
// Save the score of lastScoredDoc, so that we don't compute it more than
|
@Override
|
||||||
// once in score().
|
public float score() throws IOException {
|
||||||
private float lastDocScore = Float.NaN;
|
|
||||||
@Override public float score() throws IOException {
|
|
||||||
int doc = docID();
|
|
||||||
if (doc >= lastScoredDoc) {
|
|
||||||
if (doc > lastScoredDoc) {
|
|
||||||
lastDocScore = super.score();
|
|
||||||
lastScoredDoc = doc;
|
|
||||||
}
|
|
||||||
coordinator.nrMatchers += super.nrMatchers;
|
coordinator.nrMatchers += super.nrMatchers;
|
||||||
}
|
return (float) super.score;
|
||||||
return lastDocScore;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,84 +17,58 @@ package org.apache.lucene.search;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
|
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
|
||||||
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
|
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
|
||||||
*/
|
*/
|
||||||
class DisjunctionSumScorer extends DisjunctionScorer {
|
class DisjunctionSumScorer extends DisjunctionScorer {
|
||||||
/** The minimum number of scorers that should match. */
|
|
||||||
private final int minimumNrMatchers;
|
|
||||||
|
|
||||||
/** The document number of the current match. */
|
/** The document number of the current match. */
|
||||||
private int doc = -1;
|
private int doc = -1;
|
||||||
|
|
||||||
/** The number of subscorers that provide the current match. */
|
/** The number of subscorers that provide the current match. */
|
||||||
protected int nrMatchers = -1;
|
protected int nrMatchers = -1;
|
||||||
|
|
||||||
private double score = Float.NaN;
|
protected double score = Float.NaN;
|
||||||
|
private final float[] coord;
|
||||||
|
|
||||||
/** Construct a <code>DisjunctionScorer</code>.
|
/** Construct a <code>DisjunctionScorer</code>.
|
||||||
* @param weight The weight to be used.
|
* @param weight The weight to be used.
|
||||||
* @param subScorers A collection of at least two subscorers.
|
* @param subScorers Array of at least two subscorers.
|
||||||
* @param minimumNrMatchers The positive minimum number of subscorers that should
|
* @param coord Table of coordination factors
|
||||||
* match to match this query.
|
|
||||||
* <br>When <code>minimumNrMatchers</code> is bigger than
|
|
||||||
* the number of <code>subScorers</code>,
|
|
||||||
* no matches will be produced.
|
|
||||||
* <br>When minimumNrMatchers equals the number of subScorers,
|
|
||||||
* it more efficient to use <code>ConjunctionScorer</code>.
|
|
||||||
*/
|
*/
|
||||||
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers) throws IOException {
|
DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
|
||||||
super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size());
|
super(weight, subScorers, subScorers.length);
|
||||||
|
|
||||||
if (minimumNrMatchers <= 0) {
|
|
||||||
throw new IllegalArgumentException("Minimum nr of matchers must be positive");
|
|
||||||
}
|
|
||||||
if (numScorers <= 1) {
|
if (numScorers <= 1) {
|
||||||
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
||||||
}
|
}
|
||||||
|
this.coord = coord;
|
||||||
this.minimumNrMatchers = minimumNrMatchers;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
|
|
||||||
* of matching subscorers.
|
|
||||||
*/
|
|
||||||
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers) throws IOException {
|
|
||||||
this(weight, subScorers, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public int nextDoc() throws IOException {
|
||||||
assert doc != NO_MORE_DOCS;
|
assert doc != NO_MORE_DOCS;
|
||||||
while(true) {
|
while(true) {
|
||||||
while (subScorers[0].docID() == doc) {
|
|
||||||
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
||||||
heapAdjust(0);
|
heapAdjust(0);
|
||||||
} else {
|
} else {
|
||||||
heapRemoveRoot();
|
heapRemoveRoot();
|
||||||
if (numScorers < minimumNrMatchers) {
|
if (numScorers == 0) {
|
||||||
return doc = NO_MORE_DOCS;
|
return doc = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if (subScorers[0].docID() != doc) {
|
||||||
afterNext();
|
afterNext();
|
||||||
if (nrMatchers >= minimumNrMatchers) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void afterNext() throws IOException {
|
private void afterNext() throws IOException {
|
||||||
final Scorer sub = subScorers[0];
|
final Scorer sub = subScorers[0];
|
||||||
doc = sub.docID();
|
doc = sub.docID();
|
||||||
if (doc == NO_MORE_DOCS) {
|
if (doc != NO_MORE_DOCS) {
|
||||||
nrMatchers = Integer.MAX_VALUE; // stop looping
|
|
||||||
} else {
|
|
||||||
score = sub.score();
|
score = sub.score();
|
||||||
nrMatchers = 1;
|
nrMatchers = 1;
|
||||||
countMatches(1);
|
countMatches(1);
|
||||||
|
@ -104,9 +78,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
||||||
|
|
||||||
// TODO: this currently scores, but so did the previous impl
|
// TODO: this currently scores, but so did the previous impl
|
||||||
// TODO: remove recursion.
|
// TODO: remove recursion.
|
||||||
// TODO: if we separate scoring, out of here, modify this
|
// TODO: if we separate scoring, out of here,
|
||||||
// and afterNext() to terminate when nrMatchers == minimumNrMatchers
|
// then change freq() to just always compute it from scratch
|
||||||
// then also change freq() to just always compute it from scratch
|
|
||||||
private void countMatches(int root) throws IOException {
|
private void countMatches(int root) throws IOException {
|
||||||
if (root < numScorers && subScorers[root].docID() == doc) {
|
if (root < numScorers && subScorers[root].docID() == doc) {
|
||||||
nrMatchers++;
|
nrMatchers++;
|
||||||
|
@ -121,7 +94,7 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public float score() throws IOException {
|
||||||
return (float)score;
|
return (float)score * coord[nrMatchers];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -146,8 +119,8 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int advance(int target) throws IOException {
|
public int advance(int target) throws IOException {
|
||||||
if (numScorers == 0) return doc = NO_MORE_DOCS;
|
assert doc != NO_MORE_DOCS;
|
||||||
while (subScorers[0].docID() < target) {
|
while(true) {
|
||||||
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
|
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
|
||||||
heapAdjust(0);
|
heapAdjust(0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -156,14 +129,10 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
||||||
return doc = NO_MORE_DOCS;
|
return doc = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if (subScorers[0].docID() >= target) {
|
||||||
|
|
||||||
afterNext();
|
afterNext();
|
||||||
|
|
||||||
if (nrMatchers >= minimumNrMatchers) {
|
|
||||||
return doc;
|
return doc;
|
||||||
} else {
|
}
|
||||||
return nextDoc();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue