mirror of https://github.com/apache/lucene.git
LUCENE-5694: don't score subscorers in DisjunctionSum/Max unless score is called
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1596907 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
75b9821eb4
commit
c42b921376
|
@ -188,6 +188,9 @@ Optimizations
|
|||
* LUCENE-4236: Optimize BooleanQuery's in-order scoring. This speeds up
|
||||
some types of boolean queries. (Robert Muir)
|
||||
|
||||
* LUCENE-5694: Don't score() subscorers in DisjunctionSumScorer or
|
||||
DisjunctionMaxScorer unless score() is called. (Robert Muir)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-5673: MMapDirectory: Work around a "bug" in the JDK that throws
|
||||
|
|
|
@ -478,7 +478,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
} else {
|
||||
float coords[] = new float[prohibited.size()+1];
|
||||
Arrays.fill(coords, 1F);
|
||||
// TODO: don't score here.
|
||||
return new ReqExclScorer(main,
|
||||
new DisjunctionSumScorer(this,
|
||||
prohibited.toArray(new Scorer[prohibited.size()]),
|
||||
|
|
|
@ -107,13 +107,11 @@ class BooleanTopLevelScorers {
|
|||
static class ReqMultiOptScorer extends ReqOptSumScorer {
|
||||
private final int requiredCount;
|
||||
private final float coords[];
|
||||
private final Scorer disjunction;
|
||||
|
||||
public ReqMultiOptScorer(Scorer reqScorer, Scorer optScorer, int requiredCount, float coords[]) {
|
||||
super(reqScorer, optScorer);
|
||||
this.requiredCount = requiredCount;
|
||||
this.coords = coords;
|
||||
this.disjunction = optScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -130,7 +128,7 @@ class BooleanTopLevelScorers {
|
|||
return reqScore * coords[requiredCount];
|
||||
}
|
||||
|
||||
return optScorerDoc == curDoc ? (reqScore + optScorer.score()) * coords[requiredCount + disjunction.freq()] : reqScore * coords[requiredCount];
|
||||
return optScorerDoc == curDoc ? (reqScore + optScorer.score()) * coords[requiredCount + optScorer.freq()] : reqScore * coords[requiredCount];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -160,15 +160,17 @@ public class DisjunctionMaxQuery extends Query implements Iterable<Query> {
|
|||
Scorer subScorer = w.scorer(context, acceptDocs);
|
||||
if (subScorer != null) {
|
||||
scorers.add(subScorer);
|
||||
|
||||
}
|
||||
}
|
||||
if (scorers.isEmpty()) {
|
||||
// no sub-scorers had any documents
|
||||
return null;
|
||||
} else if (scorers.size() == 1) {
|
||||
// only one sub-scorer in this segment
|
||||
return scorers.get(0);
|
||||
} else {
|
||||
return new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers.toArray(new Scorer[scorers.size()]));
|
||||
}
|
||||
DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers.toArray(new Scorer[scorers.size()]));
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Explain the score we computed for doc */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
/*
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -24,10 +24,9 @@ import java.io.IOException;
|
|||
* by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
|
||||
* for the other subqueries that generate the document.
|
||||
*/
|
||||
class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||
final class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
|
||||
private final float tieBreakerMultiplier;
|
||||
private int freq = -1;
|
||||
|
||||
/* Used when scoring currently matching doc. */
|
||||
private float scoreSum;
|
||||
|
@ -44,45 +43,27 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
* @param subScorers
|
||||
* The sub scorers this Scorer should iterate on
|
||||
*/
|
||||
public DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier,
|
||||
Scorer[] subScorers) {
|
||||
DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) {
|
||||
super(weight, subScorers);
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
/** Determine the current document score. Initially invalid, until {@link #nextDoc()} is called the first time.
|
||||
* @return the score of the current generated document
|
||||
*/
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
|
||||
protected void reset() {
|
||||
scoreSum = scoreMax = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void afterNext() throws IOException {
|
||||
doc = subScorers[0].docID();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
scoreSum = scoreMax = subScorers[0].score();
|
||||
freq = 1;
|
||||
scoreAll(1);
|
||||
scoreAll(2);
|
||||
protected void accum(Scorer subScorer) throws IOException {
|
||||
float subScore = subScorer.score();
|
||||
scoreSum += subScore;
|
||||
if (subScore > scoreMax) {
|
||||
scoreMax = subScore;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively iterate all subScorers that generated last doc computing sum and max
|
||||
private void scoreAll(int root) throws IOException {
|
||||
if (root < numScorers && subScorers[root].docID() == doc) {
|
||||
float sub = subScorers[root].score();
|
||||
freq++;
|
||||
scoreSum += sub;
|
||||
scoreMax = Math.max(scoreMax, sub);
|
||||
scoreAll((root<<1)+1);
|
||||
scoreAll((root<<1)+2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
protected float getFinal() {
|
||||
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,25 +23,30 @@ import java.util.Collection;
|
|||
|
||||
/**
|
||||
* Base class for Scorers that score disjunctions.
|
||||
* Currently this just provides helper methods to manage the heap.
|
||||
*/
|
||||
abstract class DisjunctionScorer extends Scorer {
|
||||
protected final Scorer subScorers[];
|
||||
private final Scorer subScorers[];
|
||||
private int numScorers;
|
||||
|
||||
/** The document number of the current match. */
|
||||
protected int doc = -1;
|
||||
protected int numScorers;
|
||||
/** Number of matching scorers for the current match. */
|
||||
protected int freq = -1;
|
||||
|
||||
protected DisjunctionScorer(Weight weight, Scorer subScorers[]) {
|
||||
super(weight);
|
||||
this.subScorers = subScorers;
|
||||
this.numScorers = subScorers.length;
|
||||
if (numScorers <= 1) {
|
||||
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
||||
}
|
||||
heapify();
|
||||
}
|
||||
|
||||
/**
|
||||
* Organize subScorers into a min heap with scorers generating the earliest document on top.
|
||||
*/
|
||||
protected final void heapify() {
|
||||
private void heapify() {
|
||||
for (int i = (numScorers >>> 1) - 1; i >= 0; i--) {
|
||||
heapAdjust(i);
|
||||
}
|
||||
|
@ -51,7 +56,7 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
* The subtree of subScorers at root is a min heap except possibly for its root element.
|
||||
* Bubble the root down as required to make the subtree a heap.
|
||||
*/
|
||||
protected final void heapAdjust(int root) {
|
||||
private void heapAdjust(int root) {
|
||||
Scorer scorer = subScorers[root];
|
||||
int doc = scorer.docID();
|
||||
int i = root;
|
||||
|
@ -88,7 +93,7 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
/**
|
||||
* Remove the root Scorer from subScorers and re-establish it as a heap
|
||||
*/
|
||||
protected final void heapRemoveRoot() {
|
||||
private void heapRemoveRoot() {
|
||||
if (numScorers == 1) {
|
||||
subScorers[0] = null;
|
||||
numScorers = 0;
|
||||
|
@ -110,7 +115,7 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
public final long cost() {
|
||||
long sum = 0;
|
||||
for (int i = 0; i < numScorers; i++) {
|
||||
sum += subScorers[i].cost();
|
||||
|
@ -119,12 +124,12 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
public final int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
public final int nextDoc() throws IOException {
|
||||
assert doc != NO_MORE_DOCS;
|
||||
while(true) {
|
||||
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
||||
|
@ -135,15 +140,16 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
if (subScorers[0].docID() != doc) {
|
||||
afterNext();
|
||||
return doc;
|
||||
int docID = subScorers[0].docID();
|
||||
if (docID != doc) {
|
||||
freq = -1;
|
||||
return doc = docID;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
public final int advance(int target) throws IOException {
|
||||
assert doc != NO_MORE_DOCS;
|
||||
while(true) {
|
||||
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
|
||||
|
@ -154,22 +160,53 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
if (subScorers[0].docID() >= target) {
|
||||
afterNext();
|
||||
return doc;
|
||||
int docID = subScorers[0].docID();
|
||||
if (docID >= target) {
|
||||
freq = -1;
|
||||
return doc = docID;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after next() or advance() land on a new document.
|
||||
* <p>
|
||||
* {@code subScorers[0]} will be positioned to the new docid,
|
||||
* which could be {@code NO_MORE_DOCS} (subclass must handle this).
|
||||
* <p>
|
||||
* implementations should assign {@code doc} appropriately, and do any
|
||||
* other work necessary to implement {@code score()} and {@code freq()}
|
||||
*/
|
||||
// TODO: make this less horrible
|
||||
protected abstract void afterNext() throws IOException;
|
||||
// if we haven't already computed freq + score, do so
|
||||
private void visitScorers() throws IOException {
|
||||
reset();
|
||||
freq = 1;
|
||||
accum(subScorers[0]);
|
||||
visit(1);
|
||||
visit(2);
|
||||
}
|
||||
|
||||
// TODO: remove recursion.
|
||||
private void visit(int root) throws IOException {
|
||||
if (root < numScorers && subScorers[root].docID() == doc) {
|
||||
freq++;
|
||||
accum(subScorers[root]);
|
||||
visit((root<<1)+1);
|
||||
visit((root<<1)+2);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final float score() throws IOException {
|
||||
visitScorers();
|
||||
return getFinal();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int freq() throws IOException {
|
||||
if (freq < 0) {
|
||||
visitScorers();
|
||||
}
|
||||
return freq;
|
||||
}
|
||||
|
||||
/** Reset score state for a new match */
|
||||
protected abstract void reset();
|
||||
|
||||
/** Factor in sub-scorer match */
|
||||
protected abstract void accum(Scorer subScorer) throws IOException;
|
||||
|
||||
/** Return final score */
|
||||
protected abstract float getFinal();
|
||||
}
|
||||
|
|
|
@ -22,12 +22,8 @@ import java.io.IOException;
|
|||
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
|
||||
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
|
||||
*/
|
||||
class DisjunctionSumScorer extends DisjunctionScorer {
|
||||
|
||||
/** The number of subscorers that provide the current match. */
|
||||
protected int nrMatchers = -1;
|
||||
|
||||
protected double score = Float.NaN;
|
||||
final class DisjunctionSumScorer extends DisjunctionScorer {
|
||||
private double score;
|
||||
private final float[] coord;
|
||||
|
||||
/** Construct a <code>DisjunctionScorer</code>.
|
||||
|
@ -35,50 +31,23 @@ class DisjunctionSumScorer extends DisjunctionScorer {
|
|||
* @param subScorers Array of at least two subscorers.
|
||||
* @param coord Table of coordination factors
|
||||
*/
|
||||
DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
|
||||
DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) {
|
||||
super(weight, subScorers);
|
||||
|
||||
if (numScorers <= 1) {
|
||||
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
||||
}
|
||||
this.coord = coord;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void afterNext() throws IOException {
|
||||
final Scorer sub = subScorers[0];
|
||||
doc = sub.docID();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
score = sub.score();
|
||||
nrMatchers = 1;
|
||||
countMatches(1);
|
||||
countMatches(2);
|
||||
}
|
||||
protected void reset() {
|
||||
score = 0;
|
||||
}
|
||||
|
||||
// TODO: this currently scores, but so did the previous impl
|
||||
// TODO: remove recursion.
|
||||
// TODO: if we separate scoring, out of here,
|
||||
// then change freq() to just always compute it from scratch
|
||||
private void countMatches(int root) throws IOException {
|
||||
if (root < numScorers && subScorers[root].docID() == doc) {
|
||||
nrMatchers++;
|
||||
score += subScorers[root].score();
|
||||
countMatches((root<<1)+1);
|
||||
countMatches((root<<1)+2);
|
||||
}
|
||||
@Override
|
||||
protected void accum(Scorer subScorer) throws IOException {
|
||||
score += subScorer.score();
|
||||
}
|
||||
|
||||
/** Returns the score of the current document matching the query.
|
||||
* Initially invalid, until {@link #nextDoc()} is called the first time.
|
||||
*/
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return (float)score * coord[nrMatchers];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return nrMatchers;
|
||||
protected float getFinal() {
|
||||
return (float)score * coord[freq];
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue