diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 304480681ba..602924ba9c4 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -206,6 +206,9 @@ Optimizations * LUCENE-6892: various lucene.index initialCapacity tweaks (Christine Poerschke) +* LUCENE-6276: Added TwoPhaseIterator.matchCost() which allows to confirm the + least costly TwoPhaseIterators first. (Paul Elschot via Adrien Grand) + Bug Fixes * LUCENE-6817: ComplexPhraseQueryParser.ComplexPhraseQuery does not display diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index fd7cccd99e8..07227d2c501 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -155,7 +155,7 @@ public class ConjunctionDISI extends DocIdSetIterator { @Override public long cost() { - return lead.cost(); + return lead.cost(); // overestimate } /** @@ -164,16 +164,33 @@ public class ConjunctionDISI extends DocIdSetIterator { private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator { private final TwoPhaseIterator[] twoPhaseIterators; + private final float matchCost; private TwoPhaseConjunctionDISI(List iterators, List twoPhaseIterators) { super(new ConjunctionDISI(iterators)); assert twoPhaseIterators.size() > 0; + + CollectionUtil.timSort(twoPhaseIterators, new Comparator() { + @Override + public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) { + return Float.compare(o1.matchCost(), o2.matchCost()); + } + }); + this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]); + + // Compute the matchCost as the total matchCost of the sub iterators. + // TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight. + float totalMatchCost = 0; + for (TwoPhaseIterator tpi : twoPhaseIterators) { + totalMatchCost += tpi.matchCost(); + } + matchCost = totalMatchCost; } @Override public boolean matches() throws IOException { - for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { + for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first if (twoPhaseIterator.matches() == false) { return false; } @@ -181,6 +198,11 @@ public class ConjunctionDISI extends DocIdSetIterator { return true; } + @Override + public float matchCost() { + return matchCost; + } + } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index c32a520e416..e02efbaaaa9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -52,19 +52,25 @@ abstract class DisjunctionScorer extends Scorer { @Override public TwoPhaseIterator asTwoPhaseIterator() { - boolean hasApproximation = false; + float sumMatchCost = 0; + long sumApproxCost = 0; + + // Compute matchCost as the avarage over the matchCost of the subScorers. + // This is weighted by the cost, which is an expected number of matching documents. for (DisiWrapper w : subScorers) { if (w.twoPhaseView != null) { - hasApproximation = true; - break; + long costWeight = (w.cost <= 1) ? 1 : w.cost; + sumMatchCost += w.twoPhaseView.matchCost() * costWeight; + sumApproxCost += costWeight; } } - if (! hasApproximation) { - // none of the sub scorers supports approximations + if (sumApproxCost == 0) { // no sub scorer supports approximations return null; } + final float matchCost = sumMatchCost / sumApproxCost; + // note it is important to share the same pq as this scorer so that // rebalancing the pq through the approximation will also rebalance // the pq in this scorer. @@ -105,6 +111,11 @@ abstract class DisjunctionScorer extends Scorer { DisjunctionScorer.this.topScorers = topScorers; return true; } + + @Override + public float matchCost() { + return matchCost; + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index 48060ef70f3..248a948a341 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -44,9 +44,11 @@ final class ExactPhraseScorer extends Scorer { private final Similarity.SimScorer docScorer; private final boolean needsScores; + private float matchCost; ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SimScorer docScorer, boolean needsScores) throws IOException { + Similarity.SimScorer docScorer, boolean needsScores, + float matchCost) throws IOException { super(weight); this.docScorer = docScorer; this.needsScores = needsScores; @@ -59,6 +61,7 @@ final class ExactPhraseScorer extends Scorer { } conjunction = ConjunctionDISI.intersect(iterators); this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]); + this.matchCost = matchCost; } @Override @@ -68,6 +71,11 @@ final class ExactPhraseScorer extends Scorer { public boolean matches() throws IOException { return phraseFreq() > 0; } + + @Override + public float matchCost() { + return matchCost; + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index f29d86a758d..f36c1763de3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -189,6 +189,7 @@ public class MultiPhraseQuery extends Query { // Reuse single TermsEnum below: final TermsEnum termsEnum = fieldTerms.iterator(); + float totalMatchCost = 0; for (int pos=0; pos + * Aside: Instead of being constant this could depend among others on + * {@link Lucene50PostingsFormat#BLOCK_SIZE}, + * {@link TermsEnum#docFreq()}, + * {@link TermsEnum#totalTermFreq()}, + * {@link DocIdSetIterator#cost()} (expected number of matching docs), + * {@link LeafReader#maxDoc()} (total number of docs in the segment), + * and the seek time and block size of the device storing the index. + */ + private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128; + + /** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()} + * when no seek or buffer refill is done. + */ + private static final int TERM_OPS_PER_POS = 7; + + /** Returns an expected cost in simple operations + * of processing the occurrences of a term + * in a document that contains the term. + * This is for use by {@link TwoPhaseIterator#matchCost} implementations. + *
This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available. + * @param termsEnum The term is the term at which this TermsEnum is positioned. + */ + static float termPositionsCost(TermsEnum termsEnum) throws IOException { + int docFreq = termsEnum.docFreq(); + assert docFreq > 0; + long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available + float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); + return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; + } + + @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return new PhraseWeight(searcher, needsScores); diff --git a/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java b/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java index 5e920cb3db9..2d25e296a46 100644 --- a/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java @@ -62,6 +62,11 @@ public abstract class RandomAccessWeight extends ConstantScoreWeight { return matchingDocs.get(doc); } + + @Override + public float matchCost() { + return 10; // TODO: use some cost of matchingDocs + } }; return new ConstantScoreScorer(this, score(), twoPhase); diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index 125d8872255..d401cde1abf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -149,6 +149,10 @@ class ReqExclScorer extends Scorer { return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator); } + @Override + public float matchCost() { + return reqTwoPhaseIterator.matchCost(); // TODO: also use cost of exclApproximation.advance() + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 4ee2bf63f70..5c565ce9638 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -52,9 +52,11 @@ final class SloppyPhraseScorer extends Scorer { private int numMatches; final boolean needsScores; + private final float matchCost; SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - int slop, Similarity.SimScorer docScorer, boolean needsScores) { + int slop, Similarity.SimScorer docScorer, boolean needsScores, + float matchCost) { super(weight); this.docScorer = docScorer; this.needsScores = needsScores; @@ -68,6 +70,7 @@ final class SloppyPhraseScorer extends Scorer { phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); } conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators)); + this.matchCost = matchCost; } /** @@ -596,6 +599,16 @@ final class SloppyPhraseScorer extends Scorer { sloppyFreq = phraseFreq(); // check for phrase return sloppyFreq != 0F; } + + @Override + public float matchCost() { + return matchCost; + } + + @Override + public String toString() { + return "SloppyPhraseScorer@asTwoPhaseIterator(" + SloppyPhraseScorer.this + ")"; + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java index 3d774c5bdc5..cb947921450 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java @@ -84,15 +84,23 @@ public abstract class TwoPhaseIterator { return approximation; } - /** Return whether the current doc ID that the iterator is on matches. This + /** Return whether the current doc ID that {@link #approximation()} is on matches. This * method should only be called when the iterator is positioned -- ie. not * when {@link DocIdSetIterator#docID()} is {@code -1} or * {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */ public abstract boolean matches() throws IOException; + /** An estimate of the expected cost to determine that a single document {@link #matches()}. + * This can be called before iterating the documents of {@link #approximation()}. + * Returns an expected cost in number of simple operations like addition, multiplication, + * comparing two numbers and indexing an array. + * The returned value must be positive. + */ + public abstract float matchCost(); + /** * Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator} - * when available * otherwise returns null. + * when available, otherwise returns null. */ public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) { return (iter instanceof Scorer) diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java index fcc24846b43..533714dd34c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java @@ -88,14 +88,34 @@ abstract class ConjunctionSpans extends Spans { */ @Override public TwoPhaseIterator asTwoPhaseIterator() { - TwoPhaseIterator res = new TwoPhaseIterator(conjunction) { + float totalMatchCost = 0; + // Compute the matchCost as the total matchCost/positionsCostant of the sub spans. + for (Spans spans : subSpans) { + TwoPhaseIterator tpi = spans.asTwoPhaseIterator(); + if (tpi != null) { + totalMatchCost += tpi.matchCost(); + } else { + totalMatchCost += spans.positionsCost(); + } + } + final float matchCost = totalMatchCost; + return new TwoPhaseIterator(conjunction) { @Override public boolean matches() throws IOException { return twoPhaseCurrentDocMatches(); } + + @Override + public float matchCost() { + return matchCost; + } }; - return res; + } + + @Override + public float positionsCost() { + throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null here. } public Spans[] getSubSpans() { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java index e4ec1b527c8..21a72f473d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java @@ -142,6 +142,16 @@ public abstract class FilterSpans extends Spans { public boolean matches() throws IOException { return inner.matches() && twoPhaseCurrentDocMatches(); } + + @Override + public float matchCost() { + return inner.matchCost(); // underestimate + } + + @Override + public String toString() { + return "FilterSpans@asTwoPhaseIterator(inner=" + inner + ", in=" + in + ")"; + } }; } else { // wrapped instance has no approximation, but @@ -151,10 +161,25 @@ public abstract class FilterSpans extends Spans { public boolean matches() throws IOException { return twoPhaseCurrentDocMatches(); } + + @Override + public float matchCost() { + return in.positionsCost(); // overestimate + } + + @Override + public String toString() { + return "FilterSpans@asTwoPhaseIterator(in=" + in + ")"; + } }; } } + @Override + public float positionsCost() { + throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null + } + /** * Returns true if the current document matches. *

diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java index bd40addf54d..cf92e6f6a64 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java @@ -133,6 +133,11 @@ public class NearSpansUnordered extends ConjunctionSpans { return in.asTwoPhaseIterator(); } + @Override + public float positionsCost() { + return in.positionsCost(); + } + @Override public int docID() { return in.docID(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java index a409477da71..62744663750 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java @@ -20,6 +20,7 @@ package org.apache.lucene.search.spans; import java.io.IOException; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.TwoPhaseIterator; /** * A Spans that wraps another Spans with a different SimScorer @@ -82,4 +83,14 @@ public class ScoringWrapperSpans extends Spans { public long cost() { return in.cost(); } + + @Override + public TwoPhaseIterator asTwoPhaseIterator() { + return in.asTwoPhaseIterator(); + } + + @Override + public float positionsCost() { + return in.positionsCost(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 3fd1703bed9..33c7d924d0b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -384,6 +384,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { public long cost() { return 0; } + + @Override + public float positionsCost() { + throw new UnsupportedOperationException(); + } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 9c39f4171f6..6fadd60e685 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -210,26 +210,58 @@ public final class SpanOrQuery extends SpanQuery { @Override public TwoPhaseIterator asTwoPhaseIterator() { - boolean hasApproximation = false; + float sumMatchCost = 0; // See also DisjunctionScorer.asTwoPhaseIterator() + long sumApproxCost = 0; + for (DisiWrapper w : byDocQueue) { if (w.twoPhaseView != null) { - hasApproximation = true; - break; + long costWeight = (w.cost <= 1) ? 1 : w.cost; + sumMatchCost += w.twoPhaseView.matchCost() * costWeight; + sumApproxCost += costWeight; } } - if (!hasApproximation) { // none of the sub spans supports approximations + if (sumApproxCost == 0) { // no sub spans supports approximations + computePositionsCost(); return null; } + final float matchCost = sumMatchCost / sumApproxCost; + return new TwoPhaseIterator(new DisjunctionDISIApproximation(byDocQueue)) { @Override public boolean matches() throws IOException { return twoPhaseCurrentDocMatches(); } + + @Override + public float matchCost() { + return matchCost; + } }; } + float positionsCost = -1; + + void computePositionsCost() { + float sumPositionsCost = 0; + long sumCost = 0; + for (DisiWrapper w : byDocQueue) { + long costWeight = (w.cost <= 1) ? 1 : w.cost; + sumPositionsCost += w.iterator.positionsCost() * costWeight; + sumCost += costWeight; + } + positionsCost = sumPositionsCost / sumCost; + } + + @Override + public float positionsCost() { + // This may be called when asTwoPhaseIterator returned null, + // which happens when none of the sub spans supports approximations. + assert positionsCost > 0; + return positionsCost; + } + int lastDocTwoPhaseMatched = -1; boolean twoPhaseCurrentDocMatches() throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index be755758bf7..5a8ffb441d3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -117,10 +117,40 @@ public class SpanTermQuery extends SpanQuery { termsEnum.seekExact(term.bytes(), state); final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings()); - return new TermSpans(this, getSimScorer(context), postings, term); + float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST; + return new TermSpans(this, getSimScorer(context), postings, term, positionsCost); } } + /** A guess of + * the relative cost of dealing with the term positions + * when using a SpanNearQuery instead of a PhraseQuery. + */ + private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f; + + private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128; + + private static final int TERM_OPS_PER_POS = 7; + + /** Returns an expected cost in simple operations + * of processing the occurrences of a term + * in a document that contains the term. + *
This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available. + * @param termsEnum The term is the term at which this TermsEnum is positioned. + *

+ * This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost(). + *
+ * TODO: keep only a single copy of this method and the constants used in it + * when SpanTermQuery moves to the o.a.l.search package. + */ + static float termPositionsCost(TermsEnum termsEnum) throws IOException { + int docFreq = termsEnum.docFreq(); + assert docFreq > 0; + long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available + float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); + return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; + } + @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java index fff328ae1af..82d35378958 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java @@ -86,6 +86,17 @@ public abstract class Spans extends Scorer { */ public abstract void collect(SpanCollector collector) throws IOException; + /** + * Return an estimation of the cost of using the positions of + * this {@link Spans} for any single document, but only after + * {@link #asTwoPhaseIterator} returned {@code null}. + * Otherwise this method should not be called. + * The returned value is independent of the current document. + * + * @lucene.experimental + */ + public abstract float positionsCost(); + @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java index 802b7615404..68f3cd41b4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java @@ -37,13 +37,17 @@ public class TermSpans extends Spans { protected int count; protected int position; protected boolean readPayload; + private final float positionsCost; - public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, PostingsEnum postings, Term term) { + public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, + PostingsEnum postings, Term term, float positionsCost) { super(weight, scorer); this.postings = Objects.requireNonNull(postings); this.term = Objects.requireNonNull(term); this.doc = -1; this.position = -1; + assert positionsCost > 0; // otherwise the TermSpans should not be created. + this.positionsCost = positionsCost; } @Override @@ -118,6 +122,11 @@ public class TermSpans extends Spans { collector.collectLeaf(postings, position, term); } + @Override + public float positionsCost() { + return positionsCost; + } + @Override public String toString() { return "spans(" + term.toString() + ")@" + @@ -128,5 +137,4 @@ public class TermSpans extends Spans { public PostingsEnum getPostings() { return postings; } - } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java index f62b19dcf7c..c907e6e4306 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java @@ -37,6 +37,11 @@ public class TestConjunctionDISI extends LuceneTestCase { public boolean matches() throws IOException { return confirmed.get(iterator.docID()); } + + @Override + public float matchCost() { + return 5; // #operations in FixedBitSet#get() + } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index ed91bc67af8..f5680e9a1a7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -82,6 +82,11 @@ final class JustCompileSearchSpans { public long cost() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + + @Override + public float positionsCost() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } static final class JustCompileSpanQuery extends SpanQuery { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java index a819f9bcce7..ee51e2f3f63 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java @@ -176,6 +176,11 @@ public final class DoubleRange extends Range { public boolean matches() throws IOException { return range.accept(values.doubleVal(approximation.docID())); } + + @Override + public float matchCost() { + return 100; // TODO: use cost of range.accept() + } }; return new ConstantScoreScorer(this, score(), twoPhase); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java index 66f6e2e81a8..254bc8a6e27 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java @@ -168,6 +168,11 @@ public final class LongRange extends Range { public boolean matches() throws IOException { return range.accept(values.longVal(approximation.docID())); } + + @Override + public float matchCost() { + return 100; // TODO: use cost of range.accept() + } }; return new ConstantScoreScorer(this, score(), twoPhase); } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java index 366932b781a..e0c788099d0 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java @@ -184,6 +184,11 @@ final class GlobalOrdinalsQuery extends Query { } return false; } + + @Override + public float matchCost() { + return 100; // TODO: use cost of values.getOrd() and foundOrds.get() + } }; } } @@ -225,6 +230,11 @@ final class GlobalOrdinalsQuery extends Query { } return false; } + + @Override + public float matchCost() { + return 100; // TODO: use cost of values.getOrd() and foundOrds.get() + } }; } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java index 385b302c91b..c7763b7684f 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java @@ -211,6 +211,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query { return false; } + @Override + public float matchCost() { + return 100; // TODO: use cost of values.getOrd() and collector.score() + } }; } } @@ -253,6 +257,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query { } return false; } + + @Override + public float matchCost() { + return 100; // TODO: use cost.getOrd() of values and collector.score() + } }; } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java index c8e946e100c..a071a954725 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java @@ -53,6 +53,11 @@ public abstract class ValueSourceScorer extends Scorer { public boolean matches() throws IOException { return ValueSourceScorer.this.matches(docID()); } + + @Override + public float matchCost() { + return 100; // TODO: use cost of ValueSourceScorer.this.matches() + } }; this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java index c805581d020..9602bd689f8 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java @@ -274,6 +274,11 @@ public class PayloadScoreQuery extends SpanQuery { public long cost() { return in.cost(); } + + @Override + public float positionsCost() { + return in.positionsCost(); + } } } diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java index d49fb4108b0..a7ccfb5f7a3 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java @@ -108,6 +108,11 @@ public class CompositeVerifyQuery extends Query { public boolean matches() throws IOException { return predFuncValues.boolVal(indexQueryScorer.docID()); } + + @Override + public float matchCost() { + return 100; // TODO: use cost of predFuncValues.boolVal() + } }; return new ConstantScoreScorer(this, score(), twoPhaseIterator); diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java index 798550f031c..7810c21c8c6 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java @@ -130,6 +130,11 @@ public class IntersectsRPTVerifyQuery extends Query { return predFuncValues.boolVal(doc); } + + @Override + public float matchCost() { + return 100; // TODO: use cost of exactIterator.advance() and predFuncValues.boolVal() + } }; return new ConstantScoreScorer(this, score(), twoPhaseIterator); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index 2bc61efca38..78f6f6cc52e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -195,6 +195,19 @@ public class AssertingScorer extends Scorer { } return matches; } + + @Override + public float matchCost() { + float matchCost = in.matchCost(); + assert ! Float.isNaN(matchCost); + assert matchCost >= 0; + return matchCost; + } + + @Override + public String toString() { + return "AssertingScorer@asTwoPhaseIterator(" + in + ")"; + } }; } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java index 88cfd77cb4a..d9f97fef619 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java @@ -1,16 +1,5 @@ package org.apache.lucene.search; -import java.io.IOException; -import java.util.Random; -import java.util.Set; - -import com.carrotsearch.randomizedtesting.generators.RandomInts; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.Bits; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -28,6 +17,16 @@ import org.apache.lucene.util.Bits; * limitations under the License. */ +import java.io.IOException; +import java.util.Random; +import java.util.Set; + +import com.carrotsearch.randomizedtesting.generators.RandomInts; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + /** * A {@link Query} that adds random approximations to its scorers. */ @@ -172,10 +171,12 @@ public class RandomApproximationQuery extends Query { private final DocIdSetIterator disi; private int lastDoc = -1; + private final float randomMatchCost; RandomTwoPhaseView(Random random, DocIdSetIterator disi) { super(new RandomApproximation(random, disi)); this.disi = disi; + this.randomMatchCost = random.nextFloat() * 200; // between 0 and 200 } @Override @@ -190,6 +191,10 @@ public class RandomApproximationQuery extends Query { return approximation.docID() == disi.docID(); } + @Override + public float matchCost() { + return randomMatchCost; + } } private static class RandomApproximation extends DocIdSetIterator { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java index 89a4ed2d579..221c42b8abf 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java @@ -190,6 +190,14 @@ class AssertingSpans extends Spans { return in.cost(); } + @Override + public float positionsCost() { + float cost = in.positionsCost(); + assert ! Float.isNaN(cost) : "positionsCost() should not be NaN"; + assert cost > 0 : "positionsCost() must be positive"; + return cost; + } + @Override protected float scoreCurrentDoc() throws IOException { assert in.docScorer != null : in.getClass() + " has no docScorer!"; @@ -229,6 +237,18 @@ class AssertingSpans extends Spans { } return v; } + + @Override + public float matchCost() { + float cost = in.matchCost(); + if (Float.isNaN(cost)) { + throw new AssertionError("matchCost()=" + cost + " should not be NaN on doc ID " + approximation.docID()); + } + if (cost < 0) { + throw new AssertionError("matchCost()=" + cost + " should be non negative on doc ID " + approximation.docID()); + } + return cost; + } } class AssertingDISI extends DocIdSetIterator { diff --git a/solr/core/src/java/org/apache/solr/search/Filter.java b/solr/core/src/java/org/apache/solr/search/Filter.java index 6f968a8d634..98c5d2d896d 100644 --- a/solr/core/src/java/org/apache/solr/search/Filter.java +++ b/solr/core/src/java/org/apache/solr/search/Filter.java @@ -129,6 +129,11 @@ public abstract class Filter extends Query { public boolean matches() throws IOException { return bits.get(approximation.docID()); } + + @Override + public float matchCost() { + return 10; // TODO use cost of bits.get() + } }; return new ConstantScoreScorer(this, 0f, twoPhase); }