LUCENE-5694: don't score subscorers in DisjunctionSum/Max unless score is called

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1596907 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-22 16:01:06 +00:00
parent 75b9821eb4
commit c42b921376
7 changed files with 97 additions and 108 deletions

View File

@ -188,6 +188,9 @@ Optimizations
* LUCENE-4236: Optimize BooleanQuery's in-order scoring. This speeds up
some types of boolean queries. (Robert Muir)
* LUCENE-5694: Don't score() subscorers in DisjunctionSumScorer or
DisjunctionMaxScorer unless score() is called. (Robert Muir)
Bug fixes
* LUCENE-5673: MMapDirectory: Work around a "bug" in the JDK that throws

View File

@ -478,7 +478,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
} else {
float coords[] = new float[prohibited.size()+1];
Arrays.fill(coords, 1F);
// TODO: don't score here.
return new ReqExclScorer(main,
new DisjunctionSumScorer(this,
prohibited.toArray(new Scorer[prohibited.size()]),

View File

@ -107,13 +107,11 @@ class BooleanTopLevelScorers {
static class ReqMultiOptScorer extends ReqOptSumScorer {
private final int requiredCount;
private final float coords[];
private final Scorer disjunction;
public ReqMultiOptScorer(Scorer reqScorer, Scorer optScorer, int requiredCount, float coords[]) {
super(reqScorer, optScorer);
this.requiredCount = requiredCount;
this.coords = coords;
this.disjunction = optScorer;
}
@Override
@ -130,7 +128,7 @@ class BooleanTopLevelScorers {
return reqScore * coords[requiredCount];
}
return optScorerDoc == curDoc ? (reqScore + optScorer.score()) * coords[requiredCount + disjunction.freq()] : reqScore * coords[requiredCount];
return optScorerDoc == curDoc ? (reqScore + optScorer.score()) * coords[requiredCount + optScorer.freq()] : reqScore * coords[requiredCount];
}
}
}

View File

@ -160,15 +160,17 @@ public class DisjunctionMaxQuery extends Query implements Iterable<Query> {
Scorer subScorer = w.scorer(context, acceptDocs);
if (subScorer != null) {
scorers.add(subScorer);
}
}
if (scorers.isEmpty()) {
// no sub-scorers had any documents
return null;
} else if (scorers.size() == 1) {
// only one sub-scorer in this segment
return scorers.get(0);
} else {
return new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers.toArray(new Scorer[scorers.size()]));
}
DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers.toArray(new Scorer[scorers.size()]));
return result;
}
/** Explain the score we computed for doc */

View File

@ -1,6 +1,6 @@
package org.apache.lucene.search;
/**
/*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -24,10 +24,9 @@ import java.io.IOException;
* by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
* for the other subqueries that generate the document.
*/
class DisjunctionMaxScorer extends DisjunctionScorer {
final class DisjunctionMaxScorer extends DisjunctionScorer {
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
private final float tieBreakerMultiplier;
private int freq = -1;
/* Used when scoring currently matching doc. */
private float scoreSum;
@ -44,45 +43,27 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
* @param subScorers
* The sub scorers this Scorer should iterate on
*/
public DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier,
Scorer[] subScorers) {
DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) {
super(weight, subScorers);
this.tieBreakerMultiplier = tieBreakerMultiplier;
}
/** Determine the current document score. Initially invalid, until {@link #nextDoc()} is called the first time.
* @return the score of the current generated document
*/
@Override
public float score() throws IOException {
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
protected void reset() {
scoreSum = scoreMax = 0;
}
@Override
protected void afterNext() throws IOException {
doc = subScorers[0].docID();
if (doc != NO_MORE_DOCS) {
scoreSum = scoreMax = subScorers[0].score();
freq = 1;
scoreAll(1);
scoreAll(2);
protected void accum(Scorer subScorer) throws IOException {
float subScore = subScorer.score();
scoreSum += subScore;
if (subScore > scoreMax) {
scoreMax = subScore;
}
}
// Recursively iterate all subScorers that generated last doc computing sum and max
private void scoreAll(int root) throws IOException {
if (root < numScorers && subScorers[root].docID() == doc) {
float sub = subScorers[root].score();
freq++;
scoreSum += sub;
scoreMax = Math.max(scoreMax, sub);
scoreAll((root<<1)+1);
scoreAll((root<<1)+2);
}
}
@Override
public int freq() throws IOException {
return freq;
protected float getFinal() {
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
}
}

View File

@ -23,25 +23,30 @@ import java.util.Collection;
/**
* Base class for Scorers that score disjunctions.
* Currently this just provides helper methods to manage the heap.
*/
abstract class DisjunctionScorer extends Scorer {
protected final Scorer subScorers[];
private final Scorer subScorers[];
private int numScorers;
/** The document number of the current match. */
protected int doc = -1;
protected int numScorers;
/** Number of matching scorers for the current match. */
protected int freq = -1;
protected DisjunctionScorer(Weight weight, Scorer subScorers[]) {
super(weight);
this.subScorers = subScorers;
this.numScorers = subScorers.length;
if (numScorers <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
heapify();
}
/**
* Organize subScorers into a min heap with scorers generating the earliest document on top.
*/
protected final void heapify() {
private void heapify() {
for (int i = (numScorers >>> 1) - 1; i >= 0; i--) {
heapAdjust(i);
}
@ -51,7 +56,7 @@ abstract class DisjunctionScorer extends Scorer {
* The subtree of subScorers at root is a min heap except possibly for its root element.
* Bubble the root down as required to make the subtree a heap.
*/
protected final void heapAdjust(int root) {
private void heapAdjust(int root) {
Scorer scorer = subScorers[root];
int doc = scorer.docID();
int i = root;
@ -88,7 +93,7 @@ abstract class DisjunctionScorer extends Scorer {
/**
* Remove the root Scorer from subScorers and re-establish it as a heap
*/
protected final void heapRemoveRoot() {
private void heapRemoveRoot() {
if (numScorers == 1) {
subScorers[0] = null;
numScorers = 0;
@ -110,7 +115,7 @@ abstract class DisjunctionScorer extends Scorer {
}
@Override
public long cost() {
public final long cost() {
long sum = 0;
for (int i = 0; i < numScorers; i++) {
sum += subScorers[i].cost();
@ -119,12 +124,12 @@ abstract class DisjunctionScorer extends Scorer {
}
@Override
public int docID() {
public final int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
public final int nextDoc() throws IOException {
assert doc != NO_MORE_DOCS;
while(true) {
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
@ -135,15 +140,16 @@ abstract class DisjunctionScorer extends Scorer {
return doc = NO_MORE_DOCS;
}
}
if (subScorers[0].docID() != doc) {
afterNext();
return doc;
int docID = subScorers[0].docID();
if (docID != doc) {
freq = -1;
return doc = docID;
}
}
}
@Override
public int advance(int target) throws IOException {
public final int advance(int target) throws IOException {
assert doc != NO_MORE_DOCS;
while(true) {
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
@ -154,22 +160,53 @@ abstract class DisjunctionScorer extends Scorer {
return doc = NO_MORE_DOCS;
}
}
if (subScorers[0].docID() >= target) {
afterNext();
return doc;
int docID = subScorers[0].docID();
if (docID >= target) {
freq = -1;
return doc = docID;
}
}
}
/**
* Called after next() or advance() land on a new document.
* <p>
* {@code subScorers[0]} will be positioned to the new docid,
* which could be {@code NO_MORE_DOCS} (subclass must handle this).
* <p>
* implementations should assign {@code doc} appropriately, and do any
* other work necessary to implement {@code score()} and {@code freq()}
*/
// TODO: make this less horrible
protected abstract void afterNext() throws IOException;
// if we haven't already computed freq + score, do so
private void visitScorers() throws IOException {
reset();
freq = 1;
accum(subScorers[0]);
visit(1);
visit(2);
}
// TODO: remove recursion.
private void visit(int root) throws IOException {
if (root < numScorers && subScorers[root].docID() == doc) {
freq++;
accum(subScorers[root]);
visit((root<<1)+1);
visit((root<<1)+2);
}
}
@Override
public final float score() throws IOException {
visitScorers();
return getFinal();
}
@Override
public final int freq() throws IOException {
if (freq < 0) {
visitScorers();
}
return freq;
}
/** Reset score state for a new match */
protected abstract void reset();
/** Factor in sub-scorer match */
protected abstract void accum(Scorer subScorer) throws IOException;
/** Return final score */
protected abstract float getFinal();
}

View File

@ -22,12 +22,8 @@ import java.io.IOException;
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
class DisjunctionSumScorer extends DisjunctionScorer {
/** The number of subscorers that provide the current match. */
protected int nrMatchers = -1;
protected double score = Float.NaN;
final class DisjunctionSumScorer extends DisjunctionScorer {
private double score;
private final float[] coord;
/** Construct a <code>DisjunctionScorer</code>.
@ -35,50 +31,23 @@ class DisjunctionSumScorer extends DisjunctionScorer {
* @param subScorers Array of at least two subscorers.
* @param coord Table of coordination factors
*/
DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) {
super(weight, subScorers);
if (numScorers <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
this.coord = coord;
}
@Override
protected void afterNext() throws IOException {
final Scorer sub = subScorers[0];
doc = sub.docID();
if (doc != NO_MORE_DOCS) {
score = sub.score();
nrMatchers = 1;
countMatches(1);
countMatches(2);
}
protected void reset() {
score = 0;
}
// TODO: this currently scores, but so did the previous impl
// TODO: remove recursion.
// TODO: if we separate scoring, out of here,
// then change freq() to just always compute it from scratch
private void countMatches(int root) throws IOException {
if (root < numScorers && subScorers[root].docID() == doc) {
nrMatchers++;
score += subScorers[root].score();
countMatches((root<<1)+1);
countMatches((root<<1)+2);
}
@Override
protected void accum(Scorer subScorer) throws IOException {
score += subScorer.score();
}
/** Returns the score of the current document matching the query.
* Initially invalid, until {@link #nextDoc()} is called the first time.
*/
@Override
public float score() throws IOException {
return (float)score * coord[nrMatchers];
}
@Override
public int freq() throws IOException {
return nrMatchers;
protected float getFinal() {
return (float)score * coord[freq];
}
}