LUCENE-6276: Added TwoPhaseIterator.matchCost().

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1714261 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-11-13 20:08:01 +00:00
parent 56b0a46f10
commit 0ed54b3105
33 changed files with 408 additions and 35 deletions

View File

@ -206,6 +206,9 @@ Optimizations
* LUCENE-6892: various lucene.index initialCapacity tweaks * LUCENE-6892: various lucene.index initialCapacity tweaks
(Christine Poerschke) (Christine Poerschke)
* LUCENE-6276: Added TwoPhaseIterator.matchCost() which allows to confirm the
least costly TwoPhaseIterators first. (Paul Elschot via Adrien Grand)
Bug Fixes Bug Fixes
* LUCENE-6817: ComplexPhraseQueryParser.ComplexPhraseQuery does not display * LUCENE-6817: ComplexPhraseQueryParser.ComplexPhraseQuery does not display

View File

@ -155,7 +155,7 @@ public class ConjunctionDISI extends DocIdSetIterator {
@Override @Override
public long cost() { public long cost() {
return lead.cost(); return lead.cost(); // overestimate
} }
/** /**
@ -164,16 +164,33 @@ public class ConjunctionDISI extends DocIdSetIterator {
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator { private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
private final TwoPhaseIterator[] twoPhaseIterators; private final TwoPhaseIterator[] twoPhaseIterators;
private final float matchCost;
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) { private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
super(new ConjunctionDISI(iterators)); super(new ConjunctionDISI(iterators));
assert twoPhaseIterators.size() > 0; assert twoPhaseIterators.size() > 0;
CollectionUtil.timSort(twoPhaseIterators, new Comparator<TwoPhaseIterator>() {
@Override
public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) {
return Float.compare(o1.matchCost(), o2.matchCost());
}
});
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]); this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
// Compute the matchCost as the total matchCost of the sub iterators.
// TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight.
float totalMatchCost = 0;
for (TwoPhaseIterator tpi : twoPhaseIterators) {
totalMatchCost += tpi.matchCost();
}
matchCost = totalMatchCost;
} }
@Override @Override
public boolean matches() throws IOException { public boolean matches() throws IOException {
for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first
if (twoPhaseIterator.matches() == false) { if (twoPhaseIterator.matches() == false) {
return false; return false;
} }
@ -181,6 +198,11 @@ public class ConjunctionDISI extends DocIdSetIterator {
return true; return true;
} }
@Override
public float matchCost() {
return matchCost;
}
} }
/** /**

View File

@ -52,19 +52,25 @@ abstract class DisjunctionScorer extends Scorer {
@Override @Override
public TwoPhaseIterator asTwoPhaseIterator() { public TwoPhaseIterator asTwoPhaseIterator() {
boolean hasApproximation = false; float sumMatchCost = 0;
long sumApproxCost = 0;
// Compute matchCost as the avarage over the matchCost of the subScorers.
// This is weighted by the cost, which is an expected number of matching documents.
for (DisiWrapper<Scorer> w : subScorers) { for (DisiWrapper<Scorer> w : subScorers) {
if (w.twoPhaseView != null) { if (w.twoPhaseView != null) {
hasApproximation = true; long costWeight = (w.cost <= 1) ? 1 : w.cost;
break; sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
sumApproxCost += costWeight;
} }
} }
if (! hasApproximation) { if (sumApproxCost == 0) { // no sub scorer supports approximations
// none of the sub scorers supports approximations
return null; return null;
} }
final float matchCost = sumMatchCost / sumApproxCost;
// note it is important to share the same pq as this scorer so that // note it is important to share the same pq as this scorer so that
// rebalancing the pq through the approximation will also rebalance // rebalancing the pq through the approximation will also rebalance
// the pq in this scorer. // the pq in this scorer.
@ -105,6 +111,11 @@ abstract class DisjunctionScorer extends Scorer {
DisjunctionScorer.this.topScorers = topScorers; DisjunctionScorer.this.topScorers = topScorers;
return true; return true;
} }
@Override
public float matchCost() {
return matchCost;
}
}; };
} }

View File

@ -44,9 +44,11 @@ final class ExactPhraseScorer extends Scorer {
private final Similarity.SimScorer docScorer; private final Similarity.SimScorer docScorer;
private final boolean needsScores; private final boolean needsScores;
private float matchCost;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SimScorer docScorer, boolean needsScores) throws IOException { Similarity.SimScorer docScorer, boolean needsScores,
float matchCost) throws IOException {
super(weight); super(weight);
this.docScorer = docScorer; this.docScorer = docScorer;
this.needsScores = needsScores; this.needsScores = needsScores;
@ -59,6 +61,7 @@ final class ExactPhraseScorer extends Scorer {
} }
conjunction = ConjunctionDISI.intersect(iterators); conjunction = ConjunctionDISI.intersect(iterators);
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]); this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
this.matchCost = matchCost;
} }
@Override @Override
@ -68,6 +71,11 @@ final class ExactPhraseScorer extends Scorer {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return phraseFreq() > 0; return phraseFreq() > 0;
} }
@Override
public float matchCost() {
return matchCost;
}
}; };
} }

View File

@ -189,6 +189,7 @@ public class MultiPhraseQuery extends Query {
// Reuse single TermsEnum below: // Reuse single TermsEnum below:
final TermsEnum termsEnum = fieldTerms.iterator(); final TermsEnum termsEnum = fieldTerms.iterator();
float totalMatchCost = 0;
for (int pos=0; pos<postingsFreqs.length; pos++) { for (int pos=0; pos<postingsFreqs.length; pos++) {
Term[] terms = termArrays.get(pos); Term[] terms = termArrays.get(pos);
@ -199,6 +200,7 @@ public class MultiPhraseQuery extends Query {
if (termState != null) { if (termState != null) {
termsEnum.seekExact(term.bytes(), termState); termsEnum.seekExact(term.bytes(), termState);
postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS)); postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS));
totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
} }
} }
@ -222,9 +224,13 @@ public class MultiPhraseQuery extends Query {
} }
if (slop == 0) { if (slop == 0) {
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores); return new ExactPhraseScorer(this, postingsFreqs,
similarity.simScorer(stats, context),
needsScores, totalMatchCost);
} else { } else {
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores); return new SloppyPhraseScorer(this, postingsFreqs, slop,
similarity.simScorer(stats, context),
needsScores, totalMatchCost);
} }
} }

View File

@ -24,6 +24,8 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
@ -405,6 +407,7 @@ public class PhraseQuery extends Query {
// Reuse single TermsEnum below: // Reuse single TermsEnum below:
final TermsEnum te = fieldTerms.iterator(); final TermsEnum te = fieldTerms.iterator();
float totalMatchCost = 0;
for (int i = 0; i < terms.length; i++) { for (int i = 0; i < terms.length; i++) {
final Term t = terms[i]; final Term t = terms[i];
@ -416,6 +419,7 @@ public class PhraseQuery extends Query {
te.seekExact(t.bytes(), state); te.seekExact(t.bytes(), state);
PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS); PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS);
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
totalMatchCost += termPositionsCost(te);
} }
// sort by increasing docFreq order // sort by increasing docFreq order
@ -424,9 +428,13 @@ public class PhraseQuery extends Query {
} }
if (slop == 0) { // optimize exact case if (slop == 0) { // optimize exact case
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores); return new ExactPhraseScorer(this, postingsFreqs,
similarity.simScorer(stats, context),
needsScores, totalMatchCost);
} else { } else {
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores); return new SloppyPhraseScorer(this, postingsFreqs, slop,
similarity.simScorer(stats, context),
needsScores, totalMatchCost);
} }
} }
@ -456,6 +464,42 @@ public class PhraseQuery extends Query {
} }
} }
/** A guess of
* the average number of simple operations for the initial seek and buffer refill
* per document for the positions of a term.
* See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}.
* <p>
* Aside: Instead of being constant this could depend among others on
* {@link Lucene50PostingsFormat#BLOCK_SIZE},
* {@link TermsEnum#docFreq()},
* {@link TermsEnum#totalTermFreq()},
* {@link DocIdSetIterator#cost()} (expected number of matching docs),
* {@link LeafReader#maxDoc()} (total number of docs in the segment),
* and the seek time and block size of the device storing the index.
*/
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
/** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}
* when no seek or buffer refill is done.
*/
private static final int TERM_OPS_PER_POS = 7;
/** Returns an expected cost in simple operations
* of processing the occurrences of a term
* in a document that contains the term.
* This is for use by {@link TwoPhaseIterator#matchCost} implementations.
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
* @param termsEnum The term is the term at which this TermsEnum is positioned.
*/
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
int docFreq = termsEnum.docFreq();
assert docFreq > 0;
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}
@Override @Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new PhraseWeight(searcher, needsScores); return new PhraseWeight(searcher, needsScores);

View File

@ -62,6 +62,11 @@ public abstract class RandomAccessWeight extends ConstantScoreWeight {
return matchingDocs.get(doc); return matchingDocs.get(doc);
} }
@Override
public float matchCost() {
return 10; // TODO: use some cost of matchingDocs
}
}; };
return new ConstantScoreScorer(this, score(), twoPhase); return new ConstantScoreScorer(this, score(), twoPhase);

View File

@ -149,6 +149,10 @@ class ReqExclScorer extends Scorer {
return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator); return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
} }
@Override
public float matchCost() {
return reqTwoPhaseIterator.matchCost(); // TODO: also use cost of exclApproximation.advance()
}
}; };
} }
} }

View File

@ -52,9 +52,11 @@ final class SloppyPhraseScorer extends Scorer {
private int numMatches; private int numMatches;
final boolean needsScores; final boolean needsScores;
private final float matchCost;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SimScorer docScorer, boolean needsScores) { int slop, Similarity.SimScorer docScorer, boolean needsScores,
float matchCost) {
super(weight); super(weight);
this.docScorer = docScorer; this.docScorer = docScorer;
this.needsScores = needsScores; this.needsScores = needsScores;
@ -68,6 +70,7 @@ final class SloppyPhraseScorer extends Scorer {
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
} }
conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators)); conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
this.matchCost = matchCost;
} }
/** /**
@ -596,6 +599,16 @@ final class SloppyPhraseScorer extends Scorer {
sloppyFreq = phraseFreq(); // check for phrase sloppyFreq = phraseFreq(); // check for phrase
return sloppyFreq != 0F; return sloppyFreq != 0F;
} }
@Override
public float matchCost() {
return matchCost;
}
@Override
public String toString() {
return "SloppyPhraseScorer@asTwoPhaseIterator(" + SloppyPhraseScorer.this + ")";
}
}; };
} }
} }

View File

@ -84,15 +84,23 @@ public abstract class TwoPhaseIterator {
return approximation; return approximation;
} }
/** Return whether the current doc ID that the iterator is on matches. This /** Return whether the current doc ID that {@link #approximation()} is on matches. This
* method should only be called when the iterator is positioned -- ie. not * method should only be called when the iterator is positioned -- ie. not
* when {@link DocIdSetIterator#docID()} is {@code -1} or * when {@link DocIdSetIterator#docID()} is {@code -1} or
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */ * {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
public abstract boolean matches() throws IOException; public abstract boolean matches() throws IOException;
/** An estimate of the expected cost to determine that a single document {@link #matches()}.
* This can be called before iterating the documents of {@link #approximation()}.
* Returns an expected cost in number of simple operations like addition, multiplication,
* comparing two numbers and indexing an array.
* The returned value must be positive.
*/
public abstract float matchCost();
/** /**
* Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator} * Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
* when available * otherwise returns null. * when available, otherwise returns null.
*/ */
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) { public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
return (iter instanceof Scorer) return (iter instanceof Scorer)

View File

@ -88,14 +88,34 @@ abstract class ConjunctionSpans extends Spans {
*/ */
@Override @Override
public TwoPhaseIterator asTwoPhaseIterator() { public TwoPhaseIterator asTwoPhaseIterator() {
TwoPhaseIterator res = new TwoPhaseIterator(conjunction) { float totalMatchCost = 0;
// Compute the matchCost as the total matchCost/positionsCostant of the sub spans.
for (Spans spans : subSpans) {
TwoPhaseIterator tpi = spans.asTwoPhaseIterator();
if (tpi != null) {
totalMatchCost += tpi.matchCost();
} else {
totalMatchCost += spans.positionsCost();
}
}
final float matchCost = totalMatchCost;
return new TwoPhaseIterator(conjunction) {
@Override @Override
public boolean matches() throws IOException { public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches(); return twoPhaseCurrentDocMatches();
} }
@Override
public float matchCost() {
return matchCost;
}
}; };
return res; }
@Override
public float positionsCost() {
throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null here.
} }
public Spans[] getSubSpans() { public Spans[] getSubSpans() {

View File

@ -142,6 +142,16 @@ public abstract class FilterSpans extends Spans {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return inner.matches() && twoPhaseCurrentDocMatches(); return inner.matches() && twoPhaseCurrentDocMatches();
} }
@Override
public float matchCost() {
return inner.matchCost(); // underestimate
}
@Override
public String toString() {
return "FilterSpans@asTwoPhaseIterator(inner=" + inner + ", in=" + in + ")";
}
}; };
} else { } else {
// wrapped instance has no approximation, but // wrapped instance has no approximation, but
@ -151,10 +161,25 @@ public abstract class FilterSpans extends Spans {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches(); return twoPhaseCurrentDocMatches();
} }
@Override
public float matchCost() {
return in.positionsCost(); // overestimate
}
@Override
public String toString() {
return "FilterSpans@asTwoPhaseIterator(in=" + in + ")";
}
}; };
} }
} }
@Override
public float positionsCost() {
throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null
}
/** /**
* Returns true if the current document matches. * Returns true if the current document matches.
* <p> * <p>

View File

@ -133,6 +133,11 @@ public class NearSpansUnordered extends ConjunctionSpans {
return in.asTwoPhaseIterator(); return in.asTwoPhaseIterator();
} }
@Override
public float positionsCost() {
return in.positionsCost();
}
@Override @Override
public int docID() { public int docID() {
return in.docID(); return in.docID();

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.TwoPhaseIterator;
/** /**
* A Spans that wraps another Spans with a different SimScorer * A Spans that wraps another Spans with a different SimScorer
@ -82,4 +83,14 @@ public class ScoringWrapperSpans extends Spans {
public long cost() { public long cost() {
return in.cost(); return in.cost();
} }
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
return in.asTwoPhaseIterator();
}
@Override
public float positionsCost() {
return in.positionsCost();
}
} }

View File

@ -384,6 +384,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
public long cost() { public long cost() {
return 0; return 0;
} }
@Override
public float positionsCost() {
throw new UnsupportedOperationException();
}
} }
} }

View File

@ -210,26 +210,58 @@ public final class SpanOrQuery extends SpanQuery {
@Override @Override
public TwoPhaseIterator asTwoPhaseIterator() { public TwoPhaseIterator asTwoPhaseIterator() {
boolean hasApproximation = false; float sumMatchCost = 0; // See also DisjunctionScorer.asTwoPhaseIterator()
long sumApproxCost = 0;
for (DisiWrapper<Spans> w : byDocQueue) { for (DisiWrapper<Spans> w : byDocQueue) {
if (w.twoPhaseView != null) { if (w.twoPhaseView != null) {
hasApproximation = true; long costWeight = (w.cost <= 1) ? 1 : w.cost;
break; sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
sumApproxCost += costWeight;
} }
} }
if (!hasApproximation) { // none of the sub spans supports approximations if (sumApproxCost == 0) { // no sub spans supports approximations
computePositionsCost();
return null; return null;
} }
final float matchCost = sumMatchCost / sumApproxCost;
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) { return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
@Override @Override
public boolean matches() throws IOException { public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches(); return twoPhaseCurrentDocMatches();
} }
@Override
public float matchCost() {
return matchCost;
}
}; };
} }
float positionsCost = -1;
void computePositionsCost() {
float sumPositionsCost = 0;
long sumCost = 0;
for (DisiWrapper<Spans> w : byDocQueue) {
long costWeight = (w.cost <= 1) ? 1 : w.cost;
sumPositionsCost += w.iterator.positionsCost() * costWeight;
sumCost += costWeight;
}
positionsCost = sumPositionsCost / sumCost;
}
@Override
public float positionsCost() {
// This may be called when asTwoPhaseIterator returned null,
// which happens when none of the sub spans supports approximations.
assert positionsCost > 0;
return positionsCost;
}
int lastDocTwoPhaseMatched = -1; int lastDocTwoPhaseMatched = -1;
boolean twoPhaseCurrentDocMatches() throws IOException { boolean twoPhaseCurrentDocMatches() throws IOException {

View File

@ -117,10 +117,40 @@ public class SpanTermQuery extends SpanQuery {
termsEnum.seekExact(term.bytes(), state); termsEnum.seekExact(term.bytes(), state);
final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings()); final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
return new TermSpans(this, getSimScorer(context), postings, term); float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
return new TermSpans(this, getSimScorer(context), postings, term, positionsCost);
} }
} }
/** A guess of
* the relative cost of dealing with the term positions
* when using a SpanNearQuery instead of a PhraseQuery.
*/
private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f;
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
private static final int TERM_OPS_PER_POS = 7;
/** Returns an expected cost in simple operations
* of processing the occurrences of a term
* in a document that contains the term.
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
* @param termsEnum The term is the term at which this TermsEnum is positioned.
* <p>
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
* <br>
* TODO: keep only a single copy of this method and the constants used in it
* when SpanTermQuery moves to the o.a.l.search package.
*/
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
int docFreq = termsEnum.docFreq();
assert docFreq > 0;
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}
@Override @Override
public String toString(String field) { public String toString(String field) {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();

View File

@ -86,6 +86,17 @@ public abstract class Spans extends Scorer {
*/ */
public abstract void collect(SpanCollector collector) throws IOException; public abstract void collect(SpanCollector collector) throws IOException;
/**
* Return an estimation of the cost of using the positions of
* this {@link Spans} for any single document, but only after
* {@link #asTwoPhaseIterator} returned {@code null}.
* Otherwise this method should not be called.
* The returned value is independent of the current document.
*
* @lucene.experimental
*/
public abstract float positionsCost();
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();

View File

@ -37,13 +37,17 @@ public class TermSpans extends Spans {
protected int count; protected int count;
protected int position; protected int position;
protected boolean readPayload; protected boolean readPayload;
private final float positionsCost;
public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, PostingsEnum postings, Term term) { public TermSpans(SpanWeight weight, Similarity.SimScorer scorer,
PostingsEnum postings, Term term, float positionsCost) {
super(weight, scorer); super(weight, scorer);
this.postings = Objects.requireNonNull(postings); this.postings = Objects.requireNonNull(postings);
this.term = Objects.requireNonNull(term); this.term = Objects.requireNonNull(term);
this.doc = -1; this.doc = -1;
this.position = -1; this.position = -1;
assert positionsCost > 0; // otherwise the TermSpans should not be created.
this.positionsCost = positionsCost;
} }
@Override @Override
@ -118,6 +122,11 @@ public class TermSpans extends Spans {
collector.collectLeaf(postings, position, term); collector.collectLeaf(postings, position, term);
} }
@Override
public float positionsCost() {
return positionsCost;
}
@Override @Override
public String toString() { public String toString() {
return "spans(" + term.toString() + ")@" + return "spans(" + term.toString() + ")@" +
@ -128,5 +137,4 @@ public class TermSpans extends Spans {
public PostingsEnum getPostings() { public PostingsEnum getPostings() {
return postings; return postings;
} }
} }

View File

@ -37,6 +37,11 @@ public class TestConjunctionDISI extends LuceneTestCase {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return confirmed.get(iterator.docID()); return confirmed.get(iterator.docID());
} }
@Override
public float matchCost() {
return 5; // #operations in FixedBitSet#get()
}
}; };
} }

View File

@ -82,6 +82,11 @@ final class JustCompileSearchSpans {
public long cost() { public long cost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
@Override
public float positionsCost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
} }
static final class JustCompileSpanQuery extends SpanQuery { static final class JustCompileSpanQuery extends SpanQuery {

View File

@ -176,6 +176,11 @@ public final class DoubleRange extends Range {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return range.accept(values.doubleVal(approximation.docID())); return range.accept(values.doubleVal(approximation.docID()));
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of range.accept()
}
}; };
return new ConstantScoreScorer(this, score(), twoPhase); return new ConstantScoreScorer(this, score(), twoPhase);
} }

View File

@ -168,6 +168,11 @@ public final class LongRange extends Range {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return range.accept(values.longVal(approximation.docID())); return range.accept(values.longVal(approximation.docID()));
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of range.accept()
}
}; };
return new ConstantScoreScorer(this, score(), twoPhase); return new ConstantScoreScorer(this, score(), twoPhase);
} }

View File

@ -184,6 +184,11 @@ final class GlobalOrdinalsQuery extends Query {
} }
return false; return false;
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
}
}; };
} }
} }
@ -225,6 +230,11 @@ final class GlobalOrdinalsQuery extends Query {
} }
return false; return false;
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
}
}; };
} }

View File

@ -211,6 +211,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
return false; return false;
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of values.getOrd() and collector.score()
}
}; };
} }
} }
@ -253,6 +257,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
} }
return false; return false;
} }
@Override
public float matchCost() {
return 100; // TODO: use cost.getOrd() of values and collector.score()
}
}; };
} }
} }

View File

@ -53,6 +53,11 @@ public abstract class ValueSourceScorer extends Scorer {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return ValueSourceScorer.this.matches(docID()); return ValueSourceScorer.this.matches(docID());
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of ValueSourceScorer.this.matches()
}
}; };
this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator); this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
} }

View File

@ -274,6 +274,11 @@ public class PayloadScoreQuery extends SpanQuery {
public long cost() { public long cost() {
return in.cost(); return in.cost();
} }
@Override
public float positionsCost() {
return in.positionsCost();
}
} }
} }

View File

@ -108,6 +108,11 @@ public class CompositeVerifyQuery extends Query {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return predFuncValues.boolVal(indexQueryScorer.docID()); return predFuncValues.boolVal(indexQueryScorer.docID());
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of predFuncValues.boolVal()
}
}; };
return new ConstantScoreScorer(this, score(), twoPhaseIterator); return new ConstantScoreScorer(this, score(), twoPhaseIterator);

View File

@ -130,6 +130,11 @@ public class IntersectsRPTVerifyQuery extends Query {
return predFuncValues.boolVal(doc); return predFuncValues.boolVal(doc);
} }
@Override
public float matchCost() {
return 100; // TODO: use cost of exactIterator.advance() and predFuncValues.boolVal()
}
}; };
return new ConstantScoreScorer(this, score(), twoPhaseIterator); return new ConstantScoreScorer(this, score(), twoPhaseIterator);

View File

@ -195,6 +195,19 @@ public class AssertingScorer extends Scorer {
} }
return matches; return matches;
} }
@Override
public float matchCost() {
float matchCost = in.matchCost();
assert ! Float.isNaN(matchCost);
assert matchCost >= 0;
return matchCost;
}
@Override
public String toString() {
return "AssertingScorer@asTwoPhaseIterator(" + in + ")";
}
}; };
} }
} }

View File

@ -1,16 +1,5 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
import java.util.Set;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.Bits;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -28,6 +17,16 @@ import org.apache.lucene.util.Bits;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.Random;
import java.util.Set;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
/** /**
* A {@link Query} that adds random approximations to its scorers. * A {@link Query} that adds random approximations to its scorers.
*/ */
@ -172,10 +171,12 @@ public class RandomApproximationQuery extends Query {
private final DocIdSetIterator disi; private final DocIdSetIterator disi;
private int lastDoc = -1; private int lastDoc = -1;
private final float randomMatchCost;
RandomTwoPhaseView(Random random, DocIdSetIterator disi) { RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
super(new RandomApproximation(random, disi)); super(new RandomApproximation(random, disi));
this.disi = disi; this.disi = disi;
this.randomMatchCost = random.nextFloat() * 200; // between 0 and 200
} }
@Override @Override
@ -190,6 +191,10 @@ public class RandomApproximationQuery extends Query {
return approximation.docID() == disi.docID(); return approximation.docID() == disi.docID();
} }
@Override
public float matchCost() {
return randomMatchCost;
}
} }
private static class RandomApproximation extends DocIdSetIterator { private static class RandomApproximation extends DocIdSetIterator {

View File

@ -190,6 +190,14 @@ class AssertingSpans extends Spans {
return in.cost(); return in.cost();
} }
@Override
public float positionsCost() {
float cost = in.positionsCost();
assert ! Float.isNaN(cost) : "positionsCost() should not be NaN";
assert cost > 0 : "positionsCost() must be positive";
return cost;
}
@Override @Override
protected float scoreCurrentDoc() throws IOException { protected float scoreCurrentDoc() throws IOException {
assert in.docScorer != null : in.getClass() + " has no docScorer!"; assert in.docScorer != null : in.getClass() + " has no docScorer!";
@ -229,6 +237,18 @@ class AssertingSpans extends Spans {
} }
return v; return v;
} }
@Override
public float matchCost() {
float cost = in.matchCost();
if (Float.isNaN(cost)) {
throw new AssertionError("matchCost()=" + cost + " should not be NaN on doc ID " + approximation.docID());
}
if (cost < 0) {
throw new AssertionError("matchCost()=" + cost + " should be non negative on doc ID " + approximation.docID());
}
return cost;
}
} }
class AssertingDISI extends DocIdSetIterator { class AssertingDISI extends DocIdSetIterator {

View File

@ -129,6 +129,11 @@ public abstract class Filter extends Query {
public boolean matches() throws IOException { public boolean matches() throws IOException {
return bits.get(approximation.docID()); return bits.get(approximation.docID());
} }
@Override
public float matchCost() {
return 10; // TODO use cost of bits.get()
}
}; };
return new ConstantScoreScorer(this, 0f, twoPhase); return new ConstantScoreScorer(this, 0f, twoPhase);
} }