diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 271263dd93a..cedb02f2237 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -188,6 +188,9 @@ Optimizations * LUCENE-4236: Optimize BooleanQuery's in-order scoring. This speeds up some types of boolean queries. (Robert Muir) +* LUCENE-5694: Don't score() subscorers in DisjunctionSumScorer or + DisjunctionMaxScorer unless score() is called. (Robert Muir) + Bug fixes * LUCENE-5673: MMapDirectory: Work around a "bug" in the JDK that throws diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index f6de71293d7..4d7635d4d7a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -478,7 +478,6 @@ public class BooleanQuery extends Query implements Iterable { } else { float coords[] = new float[prohibited.size()+1]; Arrays.fill(coords, 1F); - // TODO: don't score here. return new ReqExclScorer(main, new DisjunctionSumScorer(this, prohibited.toArray(new Scorer[prohibited.size()]), diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java index 6edcf2cba6b..aedc5b09de0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java @@ -107,13 +107,11 @@ class BooleanTopLevelScorers { static class ReqMultiOptScorer extends ReqOptSumScorer { private final int requiredCount; private final float coords[]; - private final Scorer disjunction; public ReqMultiOptScorer(Scorer reqScorer, Scorer optScorer, int requiredCount, float coords[]) { super(reqScorer, optScorer); this.requiredCount = requiredCount; this.coords = coords; - this.disjunction = optScorer; } @Override @@ -130,7 +128,7 @@ class BooleanTopLevelScorers { return reqScore * coords[requiredCount]; } - return optScorerDoc == curDoc ? (reqScore + optScorer.score()) * coords[requiredCount + disjunction.freq()] : reqScore * coords[requiredCount]; + return optScorerDoc == curDoc ? (reqScore + optScorer.score()) * coords[requiredCount + optScorer.freq()] : reqScore * coords[requiredCount]; } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index e81097866a9..c1954976cd4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -160,15 +160,17 @@ public class DisjunctionMaxQuery extends Query implements Iterable { Scorer subScorer = w.scorer(context, acceptDocs); if (subScorer != null) { scorers.add(subScorer); - } } if (scorers.isEmpty()) { // no sub-scorers had any documents return null; + } else if (scorers.size() == 1) { + // only one sub-scorer in this segment + return scorers.get(0); + } else { + return new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers.toArray(new Scorer[scorers.size()])); } - DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers.toArray(new Scorer[scorers.size()])); - return result; } /** Explain the score we computed for doc */ diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java index 205e78e3242..b5d0a0df9e8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,10 +24,9 @@ import java.io.IOException; * by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores * for the other subqueries that generate the document. */ -class DisjunctionMaxScorer extends DisjunctionScorer { +final class DisjunctionMaxScorer extends DisjunctionScorer { /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */ private final float tieBreakerMultiplier; - private int freq = -1; /* Used when scoring currently matching doc. */ private float scoreSum; @@ -44,45 +43,27 @@ class DisjunctionMaxScorer extends DisjunctionScorer { * @param subScorers * The sub scorers this Scorer should iterate on */ - public DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, - Scorer[] subScorers) { + DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) { super(weight, subScorers); this.tieBreakerMultiplier = tieBreakerMultiplier; } - - /** Determine the current document score. Initially invalid, until {@link #nextDoc()} is called the first time. - * @return the score of the current generated document - */ + @Override - public float score() throws IOException { - return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier; + protected void reset() { + scoreSum = scoreMax = 0; } @Override - protected void afterNext() throws IOException { - doc = subScorers[0].docID(); - if (doc != NO_MORE_DOCS) { - scoreSum = scoreMax = subScorers[0].score(); - freq = 1; - scoreAll(1); - scoreAll(2); + protected void accum(Scorer subScorer) throws IOException { + float subScore = subScorer.score(); + scoreSum += subScore; + if (subScore > scoreMax) { + scoreMax = subScore; } } - - // Recursively iterate all subScorers that generated last doc computing sum and max - private void scoreAll(int root) throws IOException { - if (root < numScorers && subScorers[root].docID() == doc) { - float sub = subScorers[root].score(); - freq++; - scoreSum += sub; - scoreMax = Math.max(scoreMax, sub); - scoreAll((root<<1)+1); - scoreAll((root<<1)+2); - } - } - + @Override - public int freq() throws IOException { - return freq; + protected float getFinal() { + return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 6f49e09c68f..5b7e2ff1cd8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -23,25 +23,30 @@ import java.util.Collection; /** * Base class for Scorers that score disjunctions. - * Currently this just provides helper methods to manage the heap. */ abstract class DisjunctionScorer extends Scorer { - protected final Scorer subScorers[]; + private final Scorer subScorers[]; + private int numScorers; + /** The document number of the current match. */ protected int doc = -1; - protected int numScorers; + /** Number of matching scorers for the current match. */ + protected int freq = -1; protected DisjunctionScorer(Weight weight, Scorer subScorers[]) { super(weight); this.subScorers = subScorers; this.numScorers = subScorers.length; + if (numScorers <= 1) { + throw new IllegalArgumentException("There must be at least 2 subScorers"); + } heapify(); } /** * Organize subScorers into a min heap with scorers generating the earliest document on top. */ - protected final void heapify() { + private void heapify() { for (int i = (numScorers >>> 1) - 1; i >= 0; i--) { heapAdjust(i); } @@ -51,7 +56,7 @@ abstract class DisjunctionScorer extends Scorer { * The subtree of subScorers at root is a min heap except possibly for its root element. * Bubble the root down as required to make the subtree a heap. */ - protected final void heapAdjust(int root) { + private void heapAdjust(int root) { Scorer scorer = subScorers[root]; int doc = scorer.docID(); int i = root; @@ -88,7 +93,7 @@ abstract class DisjunctionScorer extends Scorer { /** * Remove the root Scorer from subScorers and re-establish it as a heap */ - protected final void heapRemoveRoot() { + private void heapRemoveRoot() { if (numScorers == 1) { subScorers[0] = null; numScorers = 0; @@ -110,7 +115,7 @@ abstract class DisjunctionScorer extends Scorer { } @Override - public long cost() { + public final long cost() { long sum = 0; for (int i = 0; i < numScorers; i++) { sum += subScorers[i].cost(); @@ -119,12 +124,12 @@ abstract class DisjunctionScorer extends Scorer { } @Override - public int docID() { + public final int docID() { return doc; } @Override - public int nextDoc() throws IOException { + public final int nextDoc() throws IOException { assert doc != NO_MORE_DOCS; while(true) { if (subScorers[0].nextDoc() != NO_MORE_DOCS) { @@ -135,15 +140,16 @@ abstract class DisjunctionScorer extends Scorer { return doc = NO_MORE_DOCS; } } - if (subScorers[0].docID() != doc) { - afterNext(); - return doc; + int docID = subScorers[0].docID(); + if (docID != doc) { + freq = -1; + return doc = docID; } } } @Override - public int advance(int target) throws IOException { + public final int advance(int target) throws IOException { assert doc != NO_MORE_DOCS; while(true) { if (subScorers[0].advance(target) != NO_MORE_DOCS) { @@ -154,22 +160,53 @@ abstract class DisjunctionScorer extends Scorer { return doc = NO_MORE_DOCS; } } - if (subScorers[0].docID() >= target) { - afterNext(); - return doc; + int docID = subScorers[0].docID(); + if (docID >= target) { + freq = -1; + return doc = docID; } } } - /** - * Called after next() or advance() land on a new document. - *

- * {@code subScorers[0]} will be positioned to the new docid, - * which could be {@code NO_MORE_DOCS} (subclass must handle this). - *

- * implementations should assign {@code doc} appropriately, and do any - * other work necessary to implement {@code score()} and {@code freq()} - */ - // TODO: make this less horrible - protected abstract void afterNext() throws IOException; + // if we haven't already computed freq + score, do so + private void visitScorers() throws IOException { + reset(); + freq = 1; + accum(subScorers[0]); + visit(1); + visit(2); + } + + // TODO: remove recursion. + private void visit(int root) throws IOException { + if (root < numScorers && subScorers[root].docID() == doc) { + freq++; + accum(subScorers[root]); + visit((root<<1)+1); + visit((root<<1)+2); + } + } + + @Override + public final float score() throws IOException { + visitScorers(); + return getFinal(); + } + + @Override + public final int freq() throws IOException { + if (freq < 0) { + visitScorers(); + } + return freq; + } + + /** Reset score state for a new match */ + protected abstract void reset(); + + /** Factor in sub-scorer match */ + protected abstract void accum(Scorer subScorer) throws IOException; + + /** Return final score */ + protected abstract float getFinal(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java index 49a06757287..f291695a352 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java @@ -22,12 +22,8 @@ import java.io.IOException; /** A Scorer for OR like queries, counterpart of ConjunctionScorer. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. */ -class DisjunctionSumScorer extends DisjunctionScorer { - - /** The number of subscorers that provide the current match. */ - protected int nrMatchers = -1; - - protected double score = Float.NaN; +final class DisjunctionSumScorer extends DisjunctionScorer { + private double score; private final float[] coord; /** Construct a DisjunctionScorer. @@ -35,50 +31,23 @@ class DisjunctionSumScorer extends DisjunctionScorer { * @param subScorers Array of at least two subscorers. * @param coord Table of coordination factors */ - DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException { + DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) { super(weight, subScorers); - - if (numScorers <= 1) { - throw new IllegalArgumentException("There must be at least 2 subScorers"); - } this.coord = coord; } @Override - protected void afterNext() throws IOException { - final Scorer sub = subScorers[0]; - doc = sub.docID(); - if (doc != NO_MORE_DOCS) { - score = sub.score(); - nrMatchers = 1; - countMatches(1); - countMatches(2); - } + protected void reset() { + score = 0; } - // TODO: this currently scores, but so did the previous impl - // TODO: remove recursion. - // TODO: if we separate scoring, out of here, - // then change freq() to just always compute it from scratch - private void countMatches(int root) throws IOException { - if (root < numScorers && subScorers[root].docID() == doc) { - nrMatchers++; - score += subScorers[root].score(); - countMatches((root<<1)+1); - countMatches((root<<1)+2); - } + @Override + protected void accum(Scorer subScorer) throws IOException { + score += subScorer.score(); } - /** Returns the score of the current document matching the query. - * Initially invalid, until {@link #nextDoc()} is called the first time. - */ @Override - public float score() throws IOException { - return (float)score * coord[nrMatchers]; - } - - @Override - public int freq() throws IOException { - return nrMatchers; + protected float getFinal() { + return (float)score * coord[freq]; } }