LUCENE-6276: Added TwoPhaseIterator.matchCost().

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1714261 13f79535-47bb-0310-9956-ffa450edef68
2015-11-13 20:08:01 +00:00 · 2015-11-13 20:08:01 +00:00 · 0ed54b3105
parent 56b0a46f10
commit 0ed54b3105
33 changed files with 408 additions and 35 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -206,6 +206,9 @@ Optimizations
 * LUCENE-6892: various lucene.index initialCapacity tweaks
  (Christine Poerschke)

+* LUCENE-6276: Added TwoPhaseIterator.matchCost() which allows to confirm the
+  least costly TwoPhaseIterators first. (Paul Elschot via Adrien Grand)
+
 Bug Fixes

 * LUCENE-6817: ComplexPhraseQueryParser.ComplexPhraseQuery does not display 
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@ -155,7 +155,7 @@ public class ConjunctionDISI extends DocIdSetIterator {

  @Override
  public long cost() {
-    return lead.cost();
+    return lead.cost(); // overestimate
  }

  /**
@ -164,16 +164,33 @@ public class ConjunctionDISI extends DocIdSetIterator {
  private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {

    private final TwoPhaseIterator[] twoPhaseIterators;
+    private final float matchCost;

    private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
      super(new ConjunctionDISI(iterators));
      assert twoPhaseIterators.size() > 0;
+
+      CollectionUtil.timSort(twoPhaseIterators, new Comparator<TwoPhaseIterator>() {
+        @Override
+        public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) {
+          return Float.compare(o1.matchCost(), o2.matchCost());
+        }
+      });
+
      this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
+
+      // Compute the matchCost as the total matchCost of the sub iterators.
+      // TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight.
+      float totalMatchCost = 0;
+      for (TwoPhaseIterator tpi : twoPhaseIterators) {
+        totalMatchCost += tpi.matchCost();
+      }
+      matchCost = totalMatchCost;
    }

    @Override
    public boolean matches() throws IOException {
-      for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) {
+      for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first
        if (twoPhaseIterator.matches() == false) {
          return false;
        }
@ -181,6 +198,11 @@ public class ConjunctionDISI extends DocIdSetIterator {
      return true;
    }

+    @Override
+    public float matchCost() {
+      return matchCost;
+    }
+
  }

  /**
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
@ -52,19 +52,25 @@ abstract class DisjunctionScorer extends Scorer {

  @Override
  public TwoPhaseIterator asTwoPhaseIterator() {
-    boolean hasApproximation = false;
+    float sumMatchCost = 0;
+    long sumApproxCost = 0;
+
+    // Compute matchCost as the avarage over the matchCost of the subScorers.
+    // This is weighted by the cost, which is an expected number of matching documents.
    for (DisiWrapper<Scorer> w : subScorers) {
      if (w.twoPhaseView != null) {
-        hasApproximation = true;
-        break;
+        long costWeight = (w.cost <= 1) ? 1 : w.cost;
+        sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
+        sumApproxCost += costWeight;
      }
    }

-    if (! hasApproximation) {
-      // none of the sub scorers supports approximations
+    if (sumApproxCost == 0) { // no sub scorer supports approximations
      return null;
    }

+    final float matchCost = sumMatchCost / sumApproxCost;
+
    // note it is important to share the same pq as this scorer so that
    // rebalancing the pq through the approximation will also rebalance
    // the pq in this scorer.
@ -105,6 +111,11 @@ abstract class DisjunctionScorer extends Scorer {
        DisjunctionScorer.this.topScorers = topScorers;
        return true;
      }
+
+      @Override
+      public float matchCost() {
+        return matchCost;
+      }
    };
  }

--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@ -44,9 +44,11 @@ final class ExactPhraseScorer extends Scorer {

  private final Similarity.SimScorer docScorer;
  private final boolean needsScores;
+  private float matchCost;

  ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
-                    Similarity.SimScorer docScorer, boolean needsScores) throws IOException {
+                    Similarity.SimScorer docScorer, boolean needsScores,
+                    float matchCost) throws IOException {
    super(weight);
    this.docScorer = docScorer;
    this.needsScores = needsScores;
@ -59,6 +61,7 @@ final class ExactPhraseScorer extends Scorer {
    }
    conjunction = ConjunctionDISI.intersect(iterators);
    this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
+    this.matchCost = matchCost;
  }

  @Override
@ -68,6 +71,11 @@ final class ExactPhraseScorer extends Scorer {
      public boolean matches() throws IOException {
        return phraseFreq() > 0;
      }
+
+      @Override
+      public float matchCost() {
+        return matchCost;
+      }
    };
  }

--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@ -189,6 +189,7 @@ public class MultiPhraseQuery extends Query {

      // Reuse single TermsEnum below:
      final TermsEnum termsEnum = fieldTerms.iterator();
+      float totalMatchCost = 0;

      for (int pos=0; pos<postingsFreqs.length; pos++) {
        Term[] terms = termArrays.get(pos);
@ -199,6 +200,7 @@ public class MultiPhraseQuery extends Query {
          if (termState != null) {
            termsEnum.seekExact(term.bytes(), termState);
            postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS));
+            totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
          }
        }
        
@ -222,9 +224,13 @@ public class MultiPhraseQuery extends Query {
      }

      if (slop == 0) {
-        return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
+        return new ExactPhraseScorer(this, postingsFreqs,
+                                      similarity.simScorer(stats, context),
+                                      needsScores, totalMatchCost);
      } else {
-        return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
+        return new SloppyPhraseScorer(this, postingsFreqs, slop,
+                                        similarity.simScorer(stats, context),
+                                        needsScores, totalMatchCost);
      }
    }

--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@ -24,6 +24,8 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;

+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.LeafReader;
@ -405,6 +407,7 @@ public class PhraseQuery extends Query {

      // Reuse single TermsEnum below:
      final TermsEnum te = fieldTerms.iterator();
+      float totalMatchCost = 0;
      
      for (int i = 0; i < terms.length; i++) {
        final Term t = terms[i];
@ -416,6 +419,7 @@ public class PhraseQuery extends Query {
        te.seekExact(t.bytes(), state);
        PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS);
        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
+        totalMatchCost += termPositionsCost(te);
      }

      // sort by increasing docFreq order
@ -424,9 +428,13 @@ public class PhraseQuery extends Query {
      }

      if (slop == 0) {  // optimize exact case
-        return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
+        return new ExactPhraseScorer(this, postingsFreqs,
+                                      similarity.simScorer(stats, context),
+                                      needsScores, totalMatchCost);
      } else {
-        return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
+        return new SloppyPhraseScorer(this, postingsFreqs, slop,
+                                        similarity.simScorer(stats, context),
+                                        needsScores, totalMatchCost);
      }
    }
    
@ -456,6 +464,42 @@ public class PhraseQuery extends Query {
    }
  }

+  /** A guess of
+   * the average number of simple operations for the initial seek and buffer refill
+   * per document for the positions of a term.
+   * See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}.
+   * <p>
+   * Aside: Instead of being constant this could depend among others on
+   * {@link Lucene50PostingsFormat#BLOCK_SIZE},
+   * {@link TermsEnum#docFreq()},
+   * {@link TermsEnum#totalTermFreq()},
+   * {@link DocIdSetIterator#cost()} (expected number of matching docs),
+   * {@link LeafReader#maxDoc()} (total number of docs in the segment),
+   * and the seek time and block size of the device storing the index.
+   */
+  private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
+
+  /** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}
+   *  when no seek or buffer refill is done.
+   */
+  private static final int TERM_OPS_PER_POS = 7;
+
+  /** Returns an expected cost in simple operations
+   *  of processing the occurrences of a term
+   *  in a document that contains the term.
+   *  This is for use by {@link TwoPhaseIterator#matchCost} implementations.
+   *  <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
+   *  @param termsEnum The term is the term at which this TermsEnum is positioned.
+   */
+  static float termPositionsCost(TermsEnum termsEnum) throws IOException {
+    int docFreq = termsEnum.docFreq();
+    assert docFreq > 0;
+    long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
+    float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
+    return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
+  }
+
+
  @Override
  public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    return new PhraseWeight(searcher, needsScores);
--- a/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java
@ -62,6 +62,11 @@ public abstract class RandomAccessWeight extends ConstantScoreWeight {

        return matchingDocs.get(doc);
      }
+
+      @Override
+      public float matchCost() {
+        return 10; // TODO: use some cost of matchingDocs
+      }
    };

    return new ConstantScoreScorer(this, score(), twoPhase);
--- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
@ -149,6 +149,10 @@ class ReqExclScorer extends Scorer {
        return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
      }

+      @Override
+      public float matchCost() {
+        return reqTwoPhaseIterator.matchCost(); // TODO: also use cost of exclApproximation.advance()
+      }
    };
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@ -52,9 +52,11 @@ final class SloppyPhraseScorer extends Scorer {
  
  private int numMatches;
  final boolean needsScores;
+  private final float matchCost;
  
  SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
-      int slop, Similarity.SimScorer docScorer, boolean needsScores) {
+      int slop, Similarity.SimScorer docScorer, boolean needsScores,
+      float matchCost) {
    super(weight);
    this.docScorer = docScorer;
    this.needsScores = needsScores;
@ -68,6 +70,7 @@ final class SloppyPhraseScorer extends Scorer {
      phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
    }
    conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
+    this.matchCost = matchCost;
  }

  /**
@ -596,6 +599,16 @@ final class SloppyPhraseScorer extends Scorer {
        sloppyFreq = phraseFreq(); // check for phrase
        return sloppyFreq != 0F;
      }
+
+      @Override
+      public float matchCost() {
+        return matchCost;
+      }
+
+      @Override
+      public String toString() {
+        return "SloppyPhraseScorer@asTwoPhaseIterator(" + SloppyPhraseScorer.this + ")";
+      }
    };
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
@ -84,15 +84,23 @@ public abstract class TwoPhaseIterator {
    return approximation;
  }

-  /** Return whether the current doc ID that the iterator is on matches. This
+  /** Return whether the current doc ID that {@link #approximation()} is on matches. This
   *  method should only be called when the iterator is positioned -- ie. not
   *  when {@link DocIdSetIterator#docID()} is {@code -1} or
   *  {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
  public abstract boolean matches() throws IOException;

+  /** An estimate of the expected cost to determine that a single document {@link #matches()}.
+   *  This can be called before iterating the documents of {@link #approximation()}.
+   *  Returns an expected cost in number of simple operations like addition, multiplication,
+   *  comparing two numbers and indexing an array.
+   *  The returned value must be positive.
+   */
+  public abstract float matchCost();
+
  /**
   * Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
-   * when available * otherwise returns null.
+   * when available, otherwise returns null.
   */
  public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
    return (iter instanceof Scorer)
--- a/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java
@ -88,14 +88,34 @@ abstract class ConjunctionSpans extends Spans {
   */
  @Override
  public TwoPhaseIterator asTwoPhaseIterator() {
-    TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
+    float totalMatchCost = 0;
+    // Compute the matchCost as the total matchCost/positionsCostant of the sub spans.
+    for (Spans spans : subSpans) {
+      TwoPhaseIterator tpi = spans.asTwoPhaseIterator();
+      if (tpi != null) {
+        totalMatchCost += tpi.matchCost();
+      } else {
+        totalMatchCost += spans.positionsCost();
+      }
+    }
+    final float matchCost = totalMatchCost;

+    return new TwoPhaseIterator(conjunction) {
      @Override
      public boolean matches() throws IOException {
        return twoPhaseCurrentDocMatches();
      }
+
+      @Override
+      public float matchCost() {
+        return matchCost;
+      }
    };
-    return res;
+  }
+
+  @Override
+  public float positionsCost() {
+    throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null here.
  }

  public Spans[] getSubSpans() {
--- a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
@ -142,6 +142,16 @@ public abstract class FilterSpans extends Spans {
        public boolean matches() throws IOException {
          return inner.matches() && twoPhaseCurrentDocMatches();
        }
+
+        @Override
+        public float matchCost() {
+          return inner.matchCost(); // underestimate
+        }
+
+        @Override
+        public String toString() {
+          return "FilterSpans@asTwoPhaseIterator(inner=" + inner + ", in=" + in + ")";
+        }
      };
    } else {
      // wrapped instance has no approximation, but 
@ -151,10 +161,25 @@ public abstract class FilterSpans extends Spans {
        public boolean matches() throws IOException {
          return twoPhaseCurrentDocMatches();
        }
+
+        @Override
+        public float matchCost() {
+          return in.positionsCost(); // overestimate
+        }
+
+        @Override
+        public String toString() {
+          return "FilterSpans@asTwoPhaseIterator(in=" + in + ")";
+        }
      };
    }
  }
  
+  @Override
+  public float positionsCost() {
+    throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null
+  }
+
  /**
   * Returns true if the current document matches.
   * <p>
--- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
@ -133,6 +133,11 @@ public class NearSpansUnordered extends ConjunctionSpans {
      return in.asTwoPhaseIterator();
    }

+    @Override
+    public float positionsCost() {
+      return in.positionsCost();
+    }
+
    @Override
    public int docID() {
      return in.docID();
--- a/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java
@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
 import java.io.IOException;

 import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.TwoPhaseIterator;

 /**
 * A Spans that wraps another Spans with a different SimScorer
@ -82,4 +83,14 @@ public class ScoringWrapperSpans extends Spans {
  public long cost() {
    return in.cost();
  }
+
+  @Override
+  public TwoPhaseIterator asTwoPhaseIterator() {
+    return in.asTwoPhaseIterator();
+  }
+
+  @Override
+  public float positionsCost() {
+    return in.positionsCost();
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
@ -384,6 +384,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
    public long cost() {
      return 0;
    }
+
+    @Override
+    public float positionsCost() {
+      throw new UnsupportedOperationException();
+    }
  }

 }
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
@ -210,26 +210,58 @@ public final class SpanOrQuery extends SpanQuery {

        @Override
        public TwoPhaseIterator asTwoPhaseIterator() {
-          boolean hasApproximation = false;
+          float sumMatchCost = 0; // See also DisjunctionScorer.asTwoPhaseIterator()
+          long sumApproxCost = 0;
+
          for (DisiWrapper<Spans> w : byDocQueue) {
            if (w.twoPhaseView != null) {
-              hasApproximation = true;
-              break;
+              long costWeight = (w.cost <= 1) ? 1 : w.cost;
+              sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
+              sumApproxCost += costWeight;
            }
          }

-          if (!hasApproximation) { // none of the sub spans supports approximations
+          if (sumApproxCost == 0) { // no sub spans supports approximations
+            computePositionsCost();
            return null;
          }

+          final float matchCost = sumMatchCost / sumApproxCost;
+
          return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
            @Override
            public boolean matches() throws IOException {
              return twoPhaseCurrentDocMatches();
            }
+
+            @Override
+            public float matchCost() {
+              return matchCost;
+            }
          };
        }

+        float positionsCost = -1;
+
+        void computePositionsCost() {
+          float sumPositionsCost = 0;
+          long sumCost = 0;
+          for (DisiWrapper<Spans> w : byDocQueue) {
+            long costWeight = (w.cost <= 1) ? 1 : w.cost;
+            sumPositionsCost += w.iterator.positionsCost() * costWeight;
+            sumCost += costWeight;
+          }
+          positionsCost = sumPositionsCost / sumCost;
+        }
+
+        @Override
+        public float positionsCost() {
+          // This may be called when asTwoPhaseIterator returned null,
+          // which happens when none of the sub spans supports approximations.
+          assert positionsCost > 0;
+          return positionsCost;
+        }
+
        int lastDocTwoPhaseMatched = -1;

        boolean twoPhaseCurrentDocMatches() throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
@ -117,10 +117,40 @@ public class SpanTermQuery extends SpanQuery {
      termsEnum.seekExact(term.bytes(), state);

      final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
-      return new TermSpans(this, getSimScorer(context), postings, term);
+      float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
+      return new TermSpans(this, getSimScorer(context), postings, term, positionsCost);
    }
  }

+  /** A guess of
+   * the relative cost of dealing with the term positions
+   * when using a SpanNearQuery instead of a PhraseQuery.
+   */
+  private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f;
+
+  private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
+
+  private static final int TERM_OPS_PER_POS = 7;
+
+  /** Returns an expected cost in simple operations
+   *  of processing the occurrences of a term
+   *  in a document that contains the term.
+   *  <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
+   *  @param termsEnum The term is the term at which this TermsEnum is positioned.
+   *  <p>
+   *  This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
+   *  <br>
+   *  TODO: keep only a single copy of this method and the constants used in it
+   *  when SpanTermQuery moves to the o.a.l.search package.
+   */
+  static float termPositionsCost(TermsEnum termsEnum) throws IOException {
+    int docFreq = termsEnum.docFreq();
+    assert docFreq > 0;
+    long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
+    float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
+    return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
+  }
+
  @Override
  public String toString(String field) {
    StringBuilder buffer = new StringBuilder();
--- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
@ -86,6 +86,17 @@ public abstract class Spans extends Scorer {
   */
  public abstract void collect(SpanCollector collector) throws IOException;

+  /**
+   * Return an estimation of the cost of using the positions of
+   * this {@link Spans} for any single document, but only after
+   * {@link #asTwoPhaseIterator} returned {@code null}.
+   * Otherwise this method should not be called.
+   * The returned value is independent of the current document.
+   *
+   * @lucene.experimental
+   */
+  public abstract float positionsCost();
+
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
--- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
@ -37,13 +37,17 @@ public class TermSpans extends Spans {
  protected int count;
  protected int position;
  protected boolean readPayload;
+  private final float positionsCost;

-  public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, PostingsEnum postings, Term term) {
+  public TermSpans(SpanWeight weight, Similarity.SimScorer scorer,
+                    PostingsEnum postings, Term term, float positionsCost) {
    super(weight, scorer);
    this.postings = Objects.requireNonNull(postings);
    this.term = Objects.requireNonNull(term);
    this.doc = -1;
    this.position = -1;
+    assert positionsCost > 0; // otherwise the TermSpans should not be created.
+    this.positionsCost = positionsCost;
  }

  @Override
@ -118,6 +122,11 @@ public class TermSpans extends Spans {
    collector.collectLeaf(postings, position, term);
  }

+  @Override
+  public float positionsCost() {
+    return positionsCost;
+  }
+
  @Override
  public String toString() {
    return "spans(" + term.toString() + ")@" +
@ -128,5 +137,4 @@ public class TermSpans extends Spans {
  public PostingsEnum getPostings() {
    return postings;
  }
-
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
@ -37,6 +37,11 @@ public class TestConjunctionDISI extends LuceneTestCase {
      public boolean matches() throws IOException {
        return confirmed.get(iterator.docID());
      }
+
+      @Override
+      public float matchCost() {
+        return 5; // #operations in FixedBitSet#get()
+      }
    };
  }

--- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
@ -82,6 +82,11 @@ final class JustCompileSearchSpans {
    public long cost() {
      throw new UnsupportedOperationException(UNSUPPORTED_MSG);
    }
+
+    @Override
+    public float positionsCost() {
+      throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+    }
  }

  static final class JustCompileSpanQuery extends SpanQuery {
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java
@ -176,6 +176,11 @@ public final class DoubleRange extends Range {
            public boolean matches() throws IOException {
              return range.accept(values.doubleVal(approximation.docID()));
            }
+
+            @Override
+            public float matchCost() {
+              return 100; // TODO: use cost of range.accept()
+            }
          };
          return new ConstantScoreScorer(this, score(), twoPhase);
        }
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java
@ -168,6 +168,11 @@ public final class LongRange extends Range {
            public boolean matches() throws IOException {
              return range.accept(values.longVal(approximation.docID()));
            }
+
+            @Override
+            public float matchCost() {
+              return 100; // TODO: use cost of range.accept()
+            }
          };
          return new ConstantScoreScorer(this, score(), twoPhase);
        }
--- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
@ -184,6 +184,11 @@ final class GlobalOrdinalsQuery extends Query {
          }
          return false;
        }
+
+        @Override
+        public float matchCost() {
+          return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
+        }
      };
    }
  }
@ -225,6 +230,11 @@ final class GlobalOrdinalsQuery extends Query {
          }
          return false;
        }
+
+        @Override
+        public float matchCost() {
+          return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
+        }
      };
    }

--- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
@ -211,6 +211,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
          return false;
        }

+        @Override
+        public float matchCost() {
+          return 100; // TODO: use cost of values.getOrd() and collector.score()
+        }
      };
    }
  }
@ -253,6 +257,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
          }
          return false;
        }
+
+        @Override
+        public float matchCost() {
+          return 100; // TODO: use cost.getOrd() of values and collector.score()
+        }
      };
    }
  }
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
@ -53,6 +53,11 @@ public abstract class ValueSourceScorer extends Scorer {
      public boolean matches() throws IOException {
        return ValueSourceScorer.this.matches(docID());
      }
+
+      @Override
+      public float matchCost() {
+        return 100; // TODO: use cost of ValueSourceScorer.this.matches()
+      }
    };
    this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
  }
--- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java
@ -274,6 +274,11 @@ public class PayloadScoreQuery extends SpanQuery {
    public long cost() {
      return in.cost();
    }
+
+    @Override
+    public float positionsCost() {
+      return in.positionsCost();
+    }
  }

 }
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java
@ -108,6 +108,11 @@ public class CompositeVerifyQuery extends Query {
          public boolean matches() throws IOException {
            return predFuncValues.boolVal(indexQueryScorer.docID());
          }
+
+          @Override
+          public float matchCost() {
+            return 100; // TODO: use cost of predFuncValues.boolVal()
+          }
        };

        return new ConstantScoreScorer(this, score(), twoPhaseIterator);
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java
@ -130,6 +130,11 @@ public class IntersectsRPTVerifyQuery extends Query {

            return predFuncValues.boolVal(doc);
          }
+
+          @Override
+          public float matchCost() {
+            return 100; // TODO: use cost of exactIterator.advance() and predFuncValues.boolVal()
+          }
        };

        return new ConstantScoreScorer(this, score(), twoPhaseIterator);
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java
@ -195,6 +195,19 @@ public class AssertingScorer extends Scorer {
        }
        return matches;
      }
+
+      @Override
+      public float matchCost() {
+        float matchCost = in.matchCost();
+        assert ! Float.isNaN(matchCost);
+        assert matchCost >= 0;
+        return matchCost;
+      }
+
+      @Override
+      public String toString() {
+        return "AssertingScorer@asTwoPhaseIterator(" + in + ")";
+      }
    };
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java
@ -1,16 +1,5 @@
 package org.apache.lucene.search;

-import java.io.IOException;
-import java.util.Random;
-import java.util.Set;
-
-import com.carrotsearch.randomizedtesting.generators.RandomInts;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.util.Bits;
-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -28,6 +17,16 @@ import org.apache.lucene.util.Bits;
 * limitations under the License.
 */

+import java.io.IOException;
+import java.util.Random;
+import java.util.Set;
+
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+
 /**
 * A {@link Query} that adds random approximations to its scorers.
 */
@ -172,10 +171,12 @@ public class RandomApproximationQuery extends Query {

    private final DocIdSetIterator disi;
    private int lastDoc = -1;
+    private final float randomMatchCost;

    RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
      super(new RandomApproximation(random, disi));
      this.disi = disi;
+      this.randomMatchCost = random.nextFloat() * 200; // between 0 and 200
    }

    @Override
@ -190,6 +191,10 @@ public class RandomApproximationQuery extends Query {
      return approximation.docID() == disi.docID();
    }

+    @Override
+    public float matchCost() {
+      return randomMatchCost;
+    }
  }

  private static class RandomApproximation extends DocIdSetIterator {
--- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java
@ -190,6 +190,14 @@ class AssertingSpans extends Spans {
    return in.cost();
  }

+  @Override
+  public float positionsCost() {
+    float cost = in.positionsCost();
+    assert ! Float.isNaN(cost) : "positionsCost() should not be NaN";
+    assert cost > 0 : "positionsCost() must be positive";
+    return cost;
+  }
+
  @Override
  protected float scoreCurrentDoc() throws IOException {
    assert in.docScorer != null : in.getClass() + " has no docScorer!";
@ -229,6 +237,18 @@ class AssertingSpans extends Spans {
      }
      return v;
    }
+
+    @Override
+    public float matchCost() {
+      float cost = in.matchCost();
+      if (Float.isNaN(cost)) {
+        throw new AssertionError("matchCost()=" + cost + " should not be NaN on doc ID " + approximation.docID());
+      }
+      if (cost < 0) {
+        throw new AssertionError("matchCost()=" + cost + " should be non negative on doc ID " + approximation.docID());
+      }
+      return cost;
+    }
  }
  
  class AssertingDISI extends DocIdSetIterator {
--- a/solr/core/src/java/org/apache/solr/search/Filter.java
+++ b/solr/core/src/java/org/apache/solr/search/Filter.java
@ -129,6 +129,11 @@ public abstract class Filter extends Query {
            public boolean matches() throws IOException {
              return bits.get(approximation.docID());
            }
+
+            @Override
+            public float matchCost() {
+              return 10; // TODO use cost of bits.get()
+            }
          };
          return new ConstantScoreScorer(this, 0f, twoPhase);
        }